You can install the released version of ConformalSmallest from CRAN with:
install.packages("ConformalSmallest")
Or directly from github
::install_github("Elsa-Yang98/ConformalSmallest") devtools
This is a basic example which shows you how to solve a common problem:
library(ConformalSmallest)
## basic example code
library(glmnet)
library(MASS)
library(mvtnorm)
source("ginverse.fun.R")
source("functions.R")
=paste("linear_fm_t3",sep="")
name
<- 3 #degrees of freedom
df <- 60 #number of dimensions
l <- 100
l.lambda <- seq(0,200,l=l.lambda)
lambda_seq <- round(seq(5,300,l=l))
dim <- 0.1
alpha <- 200 #number of training samples
n <- 100 #number of prediction points
n0 <- 100 #number of independent trials
nrep <- 0.5
rho
<- len.efcp <- matrix(0,nrep,l)
cov.efcp <- len.vfcp <- matrix(0,nrep,l)
cov.vfcp <- len.naive <- matrix(0,nrep,l)
cov.naive <- len.param <- matrix(0,nrep,l)
cov.param <- len.star <- matrix(0,nrep,l)
cov.star <- len.cv10 <- matrix(0,nrep,l)
cov.cv10 <- len.cv5 <- matrix(0,nrep,l)
cov.cv5 <- len.cvloo <- matrix(0,nrep,l)
cov.cvloo
<- out.efcp.lo <- matrix(0,n0,l)
out.efcp.up <- out.vfcp.lo <- matrix(0,n0,l)
out.vfcp.up <- out.naive.lo <- matrix(0,n0,l)
out.naive.up <- out.param.lo <- matrix(0,n0,l)
out.param.up <- out.star.lo <- matrix(0,n0,l)
out.star.up <- out.cv10.lo <- matrix(0,n0,l)
out.cv10.up <- out.cv5.lo <- matrix(0,n0,l)
out.cv5.up <- out.cvloo.lo <- matrix(0,n0,l)
out.cvloo.up
for(i in 1:nrep){
cat(i,"\n")
for (r in 1:l){
<- dim[r]
d set.seed(i)
<- matrix(rho,d,d)
Sigma diag(Sigma) <- rep(1,d)
<- rmvt(n,Sigma,df) #multivariate t distribution
X <- rep(1:5,d/5)
beta <- rt(n,df)*(1+sqrt(X[,1]^2+X[,2]^2))
eps <- X%*%beta+eps
Y
<- rmvt(n0,Sigma,df)
X0 <- rt(n0,df)*(1+sqrt(X0[,1]^2+X0[,2]^2))
eps0 <- X0%*%beta+eps0
Y0
<- ginverse.fun(X,Y,X0,alpha=alpha)
out.param <- out.param$lo
out.param.lo[,r] <- out.param$up
out.param.up[,r] <- mean(out.param.lo[,r] <= Y0 & Y0 <= out.param.up[,r])
cov.param[i,r] <- mean(out.param.up[,r]-out.param.lo[,r])
len.param[i,r]
<- efcp_ridge(X,Y,X0,lambda=lambda_seq,alpha=alpha)
out.efcp <- out.efcp$up
out.efcp.up[,r] <- out.efcp$lo
out.efcp.lo[,r] <- mean(out.efcp.lo[,r] <= Y0 & Y0 <= out.efcp.up[,r])
cov.efcp[i,r] <- mean(out.efcp.up[,r]-out.efcp.lo[,r])
len.efcp[i,r]
<- vfcp_ridge(X,Y,X0,lambda=lambda_seq,alpha=alpha)
out.vfcp <- out.vfcp$up
out.vfcp.up[,r] <- out.vfcp$lo
out.vfcp.lo[,r] <- mean(out.vfcp.lo[,r] <= Y0 & Y0 <= out.vfcp.up[,r])
cov.vfcp[i,r] <- mean(out.vfcp.up[,r]-out.vfcp.lo[,r])
len.vfcp[i,r]
<- naive.fun(X,Y,X0,alpha=alpha)
out.naive <- out.naive$up
out.naive.up[,r] <- out.naive$lo
out.naive.lo[,r] <- mean(out.naive.lo[,r] <= Y0 & Y0 <= out.naive.up[,r])
cov.naive[i,r] <- mean(out.naive.up[,r]-out.naive.lo[,r])
len.naive[i,r]
<- star.fun(X,Y,X0,lambda=lambda_seq,alpha=alpha)
out.star <- out.star$up
out.star.up[,r] <- out.star$lo
out.star.lo[,r] <- mean(out.star.lo[,r] <= Y0 & Y0 <= out.star.up[,r])
cov.star[i,r] <- mean(out.star.up[,r] - out.star.lo[,r])
len.star[i,r]
<- cv.fun(X,Y,X0,lambda=lambda_seq,alpha=alpha,nfolds=5)
out.cv5 <- out.cv5$up
out.cv5.up[,r] <- out.cv5$lo
out.cv5.lo[,r] <- mean(out.cv5.lo[,r] <= Y0 & Y0 <= out.cv5.up[,r])
cov.cv5[i,r] <- mean(out.cv5.up[,r] - out.cv5.lo[,r])
len.cv5[i,r]
}
}
<- data.frame(dim,apply(cov.param,2,mean),apply(cov.naive,2,mean),apply(cov.vfcp,2,mean),apply(cov.star,2,mean),apply(cov.cv5,2,mean), apply(cov.efcp,2,mean))
df.cov
<- data.frame(dim,apply(len.param,2,mean),apply(len.naive,2,mean),apply(len.vfcp,2,mean),apply(len.star,2,mean),apply(len.cv5,2,mean), apply(len.efcp,2,mean))
df.len
save(dim,cov.param, cov.naive, cov.vfcp, cov.star, cov.cv5, cov.efcp, file = "cov100_t3.RData" )
save(dim,len.param, len.naive, len.vfcp, len.star, len.cv5, len.efcp, file = "len100_t3.RData" )
This output the right panal of Figure 1 in our paper.
<- 3
df <- 3
d <- 100
l.lambda <- seq(0,200,l=l.lambda)
lambda_seq <- c(50,100,500,1000,5000)
nset <- 0.1 #miscoverage level
alpha <- 100 #number of prediction points
n0 <- 100 #number of independent trials
nrep <- 0.5
rho
<- expand.grid(1:nrep, nset, c("efficient", "valid"))
evaluations <- nrow(evaluations)
no_eval <- cov_mat <- data.frame(number = rep(0, no_eval),
width_mat rep = evaluations[,1],
nset = evaluations[,2],
method = evaluations[,3])
colnames(width_mat) <- colnames(cov_mat) <- c("number", "rep", "sample size", "method")
<- matrix(rho,d,d)
Sigma diag(Sigma) <- rep(1,d) #covariance matrix for X
for(idx in 1:nrow(evaluations)){
set.seed(evaluations[idx, 1])
if(idx%%1 == 0){
print(idx)
}<- evaluations[idx, 2]
n
<- rmvt(n,Sigma,df) #multivariate t distribution
X
<- rt(n,df)*(1+sqrt(X[,1]^2+X[,2]^2))
eps1 <- rt(n,df)*(1+sqrt(X[,1]^4+X[,2]^4))
eps2 <- rpois(n,sin(X[,1])^2 + cos(X[,2])^4+0.01 )+0.03*X[,1]*eps1+25*(runif(n,0,1)<0.01)*eps2
Y
<- rmvt(n0,Sigma,df)
X0 <- rt(n0,df)*(1+sqrt(X0[,1]^2+X0[,2]^2))
eps01 <- rt(n0,df)*(1+sqrt(X0[,1]^4+X0[,2]^4))
eps02 <- rpois(n0,sin(X0[,1])^2 + cos(X0[,2])^4+0.01 )+0.03*X0[,1]*eps01+25*(runif(n0,0,1)<0.01)*eps02
Y0
3] <- cov_mat[idx, 3] <- n
width_mat[idx,<- evaluations[idx, 3]
method 4] <- cov_mat[idx, 4] <- method
width_mat[idx,2] <- cov_mat[idx, 2] <- evaluations[idx, 1]
width_mat[idx,
if(method == "valid"){
<- c(1/2, 1/2)
split else {
} <- 1/2
split
}
<- seq(1e-03, 4, length = 20)*alpha
beta_grid <- unique(ceiling(seq(1/10, 1, length = 20)*d))
mtry_grid <- seq(50, 400, by = 50)
ntree_grid
<- try(conf_CQR_reg(X, Y, split = split, beta_grid, mtry_grid, ntree_grid, method = method, alpha = alpha))
tmp
while (class(tmp)=="try-error"){
<- try(conf_CQR_reg(X, Y, split = split, beta_grid, mtry_grid, ntree_grid, method = method, alpha = alpha),silent=TRUE)
tmp
}1] <- tmp$width
width_mat[idx, 1] <- mean(tmp$pred_set(X0, Y0))
cov_mat[idx,
}
par(mfrow <- c(1,2))
<- width_vfcp <- sd_width_efcp <- sd_width_vfcp <- NULL
width_efcp #sd_efcp <- sd_vfcp <- NULL
for(n in nset){
<- width_mat[evaluations[,3] == "efficient", ]
TMP <- TMP[TMP[,3] == n,]
TMP_prime
<- width_mat[evaluations[,3] == "valid", ]
TMP <- TMP[TMP[,3] == n,]
TMP_prime_vfcp =TMP_prime_vfcp[ TMP_prime_vfcp[,1]<=10^5,1]
TMP_prime_vfcp_clean
<- c(width_efcp, mean(TMP_prime[,1] / TMP_prime_vfcp[,1]))
width_efcp <- c(sd_width_efcp, sd(TMP_prime[,1]/ TMP_prime_vfcp[,1])/sqrt(nrep))
sd_width_efcp #sd_efcp = c(sd_efcp , sd(TMP_prime[,1])/sqrt(nrep) )
<- c(width_vfcp, mean(TMP_prime_vfcp[,1] / TMP_prime_vfcp[,1]))
width_vfcp <- c(sd_width_vfcp, sd(TMP_prime_vfcp[,1]/ TMP_prime_vfcp[,1])/sqrt(nrep))
sd_width_vfcp #sd_vfcp = c(sd_vfcp , sd(TMP_prime_vfcp[,1])/sqrt(nrep) )
}
#plot(dim, width_efcp, type = 'l', ylim = range(c(width_efcp+sd_efcp)), lwd = 2)
plot(nset, width_efcp, type = 'l', ylim =c(-10,25), lwd = 2)
lines(nset, width_efcp - sd_width_efcp, type = 'l', lty = 2, lwd = 2)
lines(nset, width_efcp + sd_width_efcp, type = 'l', lty = 2, lwd = 2)
lines(nset, width_vfcp, type = 'l', ylim = range(c(width_efcp, width_vfcp)), lwd = 2, col = "red")
lines(nset, width_vfcp - sd_width_vfcp, type = 'l', lty = 2, lwd = 2, col = "red")
lines(nset, width_vfcp + sd_width_vfcp, type = 'l', lty = 2, lwd = 2, col = "red")
abline(h = 1)
<- cov_vfcp <-sd_cov_efcp <- sd_cov_vfcp <- NULL
cov_efcp for(n in nset){
<- cov_mat[evaluations[,3] == "efficient", ]
TMP <- TMP[TMP[,3] == n,]
TMP_prime <- c( cov_efcp, mean(TMP_prime[,1] ) )
cov_efcp <- c(sd_cov_efcp, sd(TMP_prime[,1])/sqrt(nrep))
sd_cov_efcp
<- cov_mat[evaluations[,3] == "valid", ]
TMP <- TMP[TMP[,3] == n,]
TMP_prime <- c(cov_vfcp, mean(TMP_prime[,1]))
cov_vfcp <- c(sd_cov_vfcp, sd(TMP_prime[,1])/sqrt(nrep))
sd_cov_vfcp
}plot(nset, cov_efcp, type = 'l', ylim = c(0, 1), lwd = 2)
lines(nset, cov_vfcp, type = 'l', col = "red", lwd = 2)
abline(h = 1-alpha)
save(nset,nrep,width_mat, cov_mat, evaluations, width_efcp, sd_cov_efcp, sd_width_efcp,width_vfcp, sd_cov_vfcp,sd_width_vfcp, cov_efcp, cov_vfcp, alpha, file = "pois-100-repetitions.RData" )
This output the right panal of Figure 1 in our paper.
= 3
df = 1 #x is of one dimension
d = c(400) #number of training sample
nset = seq(0,5,by=0.2) # a grid of test points for x
x_test = 0.1 #miscoverage level
alpha = 100 #number of independent trials
nrep = 100 #number of test samples y for each test prediction sample x
nrep2
<- expand.grid(1:nrep, nset, x_test, c("efficient", "valid","CQR"))
evaluations <- nrow(evaluations)
no_eval <- cov_mat <- data.frame(number = rep(0, no_eval),
width_mat rep = evaluations[,1],
nset = evaluations[,2],
X_test = evaluations[,3],
method = evaluations[,4])
colnames(width_mat) <- colnames(cov_mat) <- c("number", "rep", "sample size", "test_value","method")
for(idx in 1:nrow(evaluations)){
set.seed(evaluations[idx, 1])
if(idx%%1 == 0){
print(idx)
}<- evaluations[idx, 2]
n = evaluations[idx, 3]
x0
= as.matrix(runif(n,0,5))
X = rnorm(n)
eps1 = rnorm(n)
eps2 = rpois(n,sin(X[,1])^2 +0.1 )+0.03*X[,1]*eps1+25*(runif(n,0,1)<0.01*eps2)
Y
= as.matrix( rep(x0,nrep2) )
X0 = rnorm(nrep2)
eps01 = rnorm(nrep2)
eps02 = rpois(nrep2,sin(X0)^2 +0.1 )+0.03*X0*eps01+25*(runif(nrep2,0,1)<0.01*eps02)
Y0
3] <- cov_mat[idx, 3] <- n
width_mat[idx,<- evaluations[idx, 4]
method #width_mat[idx,5] <- cov_mat[idx, 5] <- method
#width_mat[idx, 2] <- cov_mat[idx, 2] <- evaluations[idx, 1]
if (method =="CQR"){
= 0.05
beta_fixed = 1
mtry_fixed = 100
ntree_fixed
= try(conf_CQR_conditional(X, Y, beta_fixed, mtry_fixed, ntree_fixed, alpha = alpha))
tmp
while (class(tmp)=="try-error"){
= try(conf_CQR_conditional(X, Y, beta_fixed, mtry_fixed, ntree_fixed, alpha = alpha),silent=TRUE)
tmp
}1] <- mean(tmp$pred_set(X0, Y0)[[2]])
width_mat[idx, 1] <- mean(tmp$pred_set(X0, Y0)[[1]])
cov_mat[idx, else{ if(method == "valid"){
}<- c(1/2, 1/2)
split else {
} <- 1/2
split
}
<- seq(1e-03, 4, length = 20)*alpha
beta_grid <- unique(ceiling(seq(1/10, 1, length = 20)*d))
mtry_grid <- seq(50, 400, by = 50)
ntree_grid
= try(conf_CQR_reg_conditional(X, Y, split = split, beta_grid, mtry_grid, ntree_grid, method = method, alpha = alpha))
tmp
while (class(tmp)=="try-error"){
= try(conf_CQR_reg_conditional(X, Y, split = split, beta_grid, mtry_grid, ntree_grid, method = method, alpha = alpha),silent=TRUE)
tmp
}1] <- mean(tmp$pred_set(X0, Y0)[[2]])
width_mat[idx, 1] <- mean(tmp$pred_set(X0, Y0)[[1]])
cov_mat[idx,
}
}
par(mfrow = c(1,2))
<- sd_width_cqr <- width_efcp <- width_vfcp <- sd_width_efcp <- sd_width_vfcp <- NULL
width_cqr #sd_efcp <- sd_vfcp <- NULL
for(x in x_test){
<- width_mat[evaluations[,4] == "efficient", ]
TMP <- TMP[TMP[,4] == x,]
TMP_prime
<- width_mat[evaluations[,4] == "valid", ]
TMP <- TMP[TMP[,4] == x,]
TMP_prime_vfcp =TMP_prime_vfcp[ TMP_prime_vfcp[,1]<=10^5,1]
TMP_prime_vfcp_clean
<- width_mat[evaluations[,4] == "CQR", ]
TMP <- TMP[TMP[,4] == x,]
TMP_prime_cqr
<- c(width_efcp, mean(TMP_prime[,1] / TMP_prime_vfcp[,1]))
width_efcp <- c(sd_width_efcp, sd(TMP_prime[,1]/ TMP_prime_vfcp[,1])/sqrt(nrep))
sd_width_efcp #sd_efcp = c(sd_efcp , sd(TMP_prime[,1])/sqrt(nrep) )
<- c(width_vfcp, mean(TMP_prime_vfcp[,1] / TMP_prime_vfcp[,1]))
width_vfcp <- c(sd_width_vfcp, sd(TMP_prime_vfcp[,1]/ TMP_prime_vfcp[,1])/sqrt(nrep))
sd_width_vfcp #sd_vfcp = c(sd_vfcp , sd(TMP_prime_vfcp[,1])/sqrt(nrep) )
<- c(width_cqr, mean(TMP_prime_cqr[,1] / TMP_prime_vfcp[,1]))
width_cqr <- c(sd_width_cqr, sd(TMP_prime_cqr[,1]/ TMP_prime_vfcp[,1])/sqrt(nrep))
sd_width_cqr #sd_vfcp = c(sd_vfcp , sd(TMP_prime_vfcp[,1])/sqrt(nrep) )
}
#plot(dim, width_efcp, type = 'l', ylim = range(c(width_efcp+sd_efcp)), lwd = 2)
plot(x_test, width_efcp, type = 'l', ylim =c(-5,20), lwd = 2)
lines(x_test, width_efcp - sd_width_efcp, type = 'l', lty = 2, lwd = 2)
lines(x_test, width_efcp + sd_width_efcp, type = 'l', lty = 2, lwd = 2)
lines(x_test, width_vfcp, type = 'l', ylim = range(c(width_efcp, width_vfcp)), lwd = 2, col = "red")
lines(x_test, width_vfcp - sd_width_vfcp, type = 'l', lty = 2, lwd = 2, col = "red")
lines(x_test, width_vfcp + sd_width_vfcp, type = 'l', lty = 2, lwd = 2, col = "red")
lines(x_test, width_cqr, type = 'l', ylim = range(c(width_efcp, width_vfcp)), lwd = 2, col = "blue")
lines(x_test, width_cqr - sd_width_vfcp, type = 'l', lty = 2, lwd = 2, col = "blue")
lines(x_test, width_cqr + sd_width_vfcp, type = 'l', lty = 2, lwd = 2, col = "blue")
abline(h = 1)
<-sd_cov_cqr <-cov_efcp <- cov_vfcp <-sd_cov_efcp <- sd_cov_vfcp <- NULL
cov_cqr for(x in x_test){
<- cov_mat[evaluations[,4] == "efficient", ]
TMP <- TMP[TMP[,4] == x,]
TMP_prime <- c( cov_efcp, mean(TMP_prime[,1] ) )
cov_efcp <- c(sd_cov_efcp, sd(TMP_prime[,1])/sqrt(nrep))
sd_cov_efcp
<- cov_mat[evaluations[,4] == "valid", ]
TMP <- TMP[TMP[,4] == x,]
TMP_prime <- c(cov_vfcp, mean(TMP_prime[,1]))
cov_vfcp <- c(sd_cov_vfcp, sd(TMP_prime[,1])/sqrt(nrep))
sd_cov_vfcp
<- cov_mat[evaluations[,4] == "CQR", ]
TMP <- TMP[TMP[,4] == x,]
TMP_prime <- c(cov_cqr, mean(TMP_prime[,1]))
cov_cqr <- c(sd_cov_cqr, sd(TMP_prime[,1])/sqrt(nrep))
sd_cov_cqr
}plot(x_test, cov_efcp, type = 'l', ylim = c(0, 1), lwd = 2)
lines(x_test, cov_vfcp, type = 'l', col = "red", lwd = 2)
lines(x_test, cov_cqr, type = 'l', col = "blue", lwd = 2)
legend(0,0.2, legend=c("EFCP", "VFCP","CQR"),
col=c("black","red", "blue"), lty=1, cex=0.8)
abline(h = 1-alpha)