library(ConformalSmallest)
library(ggplot2)
This vignette is dived into three parts. 1. we explain how we use EFCP and VFCP vs. cross validation (CV) and other parametric methods for ridge regression 2. we generate the plots as shown in our paper 3. we compare the tuning parameters chosen by EFCP, VFCP, CV using two splits and CV using three splits.
library(glmnet)
library(MASS)
library(mvtnorm)
#source("ridge_funs.R")
=paste("linear_fm_t3",sep="")
name
<- 3 #degrees of freedom
df <- 60 #number of dimensions
l <- 100
l.lambda <- seq(0,200,l=l.lambda)
lambda_seq <- round(seq(5,300,l=l))
dim <- 0.1
alpha <- 200 #number of training samples
n <- 100 #number of prediction points
n0 <- 100 #number of independent trials
nrep <- 0.5
rho
<- len.efcp_linear_fm_t3 <- matrix(0,nrep,l)
cov.efcp_linear_fm_t3 <- len.vfcp_linear_fm_t3 <- matrix(0,nrep,l)
cov.vfcp_linear_fm_t3 <- len.naive_linear_fm_t3 <- matrix(0,nrep,l)
cov.naive_linear_fm_t3 <- len.param_linear_fm_t3 <- matrix(0,nrep,l)
cov.param_linear_fm_t3 <- len.star_linear_fm_t3 <- matrix(0,nrep,l)
cov.star_linear_fm_t3 <- len.cv10_linear_fm_t3 <- matrix(0,nrep,l)
cov.cv10_linear_fm_t3 <- len.cv5_linear_fm_t3 <- matrix(0,nrep,l)
cov.cv5_linear_fm_t3 <- len.cvloo_linear_fm_t3 <- matrix(0,nrep,l)
cov.cvloo_linear_fm_t3
<- out.efcp.lo <- matrix(0,n0,l)
out.efcp.up <- out.vfcp.lo <- matrix(0,n0,l)
out.vfcp.up <- out.naive.lo <- matrix(0,n0,l)
out.naive.up <- out.param.lo <- matrix(0,n0,l)
out.param.up <- out.star.lo <- matrix(0,n0,l)
out.star.up <- out.cv10.lo <- matrix(0,n0,l)
out.cv10.up <- out.cv5.lo <- matrix(0,n0,l)
out.cv5.up <- out.cvloo.lo <- matrix(0,n0,l)
out.cvloo.up
for(i in 1:nrep){
if(i%%10 == 0){
print(i)
}for (r in 1:l){
<- dim[r]
d set.seed(i)
<- matrix(rho,d,d)
Sigma diag(Sigma) <- rep(1,d)
<- rmvt(n,Sigma,df) #multivariate t distribution
X <- rep(1:5,d/5)
beta <- rt(n,df)*(1+sqrt(X[,1]^2+X[,2]^2))
eps <- X%*%beta+eps
Y
<- rmvt(n0,Sigma,df)
X0 <- rt(n0,df)*(1+sqrt(X0[,1]^2+X0[,2]^2))
eps0 <- X0%*%beta+eps0
Y0
<- ginverse.fun(X,Y,X0,alpha=alpha)
out.param <- out.param$lo
out.param.lo[,r] <- out.param$up
out.param.up[,r] <- mean(out.param.lo[,r] <= Y0 & Y0 <= out.param.up[,r])
cov.param_linear_fm_t3[i,r] <- mean(out.param.up[,r]-out.param.lo[,r])
len.param_linear_fm_t3[i,r]
<- efcp_ridge(X,Y,X0,lambda=lambda_seq,alpha=alpha)
out.efcp <- out.efcp$up
out.efcp.up[,r] <- out.efcp$lo
out.efcp.lo[,r] <- mean(out.efcp.lo[,r] <= Y0 & Y0 <= out.efcp.up[,r])
cov.efcp_linear_fm_t3[i,r] <- mean(out.efcp.up[,r]-out.efcp.lo[,r])
len.efcp_linear_fm_t3[i,r]
<- vfcp_ridge(X,Y,X0,lambda=lambda_seq,alpha=alpha)
out.vfcp <- out.vfcp$up
out.vfcp.up[,r] <- out.vfcp$lo
out.vfcp.lo[,r] <- mean(out.vfcp.lo[,r] <= Y0 & Y0 <= out.vfcp.up[,r])
cov.vfcp_linear_fm_t3[i,r] <- mean(out.vfcp.up[,r]-out.vfcp.lo[,r])
len.vfcp_linear_fm_t3[i,r]
<- naive.fun(X,Y,X0,alpha=alpha)
out.naive <- out.naive$up
out.naive.up[,r] <- out.naive$lo
out.naive.lo[,r] <- mean(out.naive.lo[,r] <= Y0 & Y0 <= out.naive.up[,r])
cov.naive_linear_fm_t3[i,r] <- mean(out.naive.up[,r]-out.naive.lo[,r])
len.naive_linear_fm_t3[i,r]
<- star.fun(X,Y,X0,lambda=lambda_seq,alpha=alpha)
out.star <- out.star$up
out.star.up[,r] <- out.star$lo
out.star.lo[,r] <- mean(out.star.lo[,r] <= Y0 & Y0 <= out.star.up[,r])
cov.star_linear_fm_t3[i,r] <- mean(out.star.up[,r] - out.star.lo[,r])
len.star_linear_fm_t3[i,r]
<- cv.fun(X,Y,X0,lambda=lambda_seq,alpha=alpha,nfolds=5)
out.cv5 <- out.cv5$up
out.cv5.up[,r] <- out.cv5$lo
out.cv5.lo[,r] <- mean(out.cv5.lo[,r] <= Y0 & Y0 <= out.cv5.up[,r])
cov.cv5_linear_fm_t3[i,r] <- mean(out.cv5.up[,r] - out.cv5.lo[,r])
len.cv5_linear_fm_t3[i,r]
}
}
=list(len.param_linear_fm_t3, len.naive_linear_fm_t3, len.vfcp_linear_fm_t3, len.star_linear_fm_t3, len.cv5_linear_fm_t3, len.efcp_linear_fm_t3)
ridge_linear_len100_t3save(ridge_linear_len100_t3, file = "ridge_linear_len100_t3.rda" )
=list(dim_linear_t3,cov.param_linear_fm_t3, cov.naive_linear_fm_t3, cov.vfcp_linear_fm_t3, cov.star_linear_fm_t3, cov.cv5_linear_fm_t3, cov.efcp_linear_fm_t3)
ridge_linear_cov100_t3save(ridge_linear_cov100_t3, file = "ridge_linear_cov100_t3.rda" )
Similarly we do the same when the degree of freedom is 5 (code omitted).
data(ridge_linear_cov100_t3,package = "ConformalSmallest")
data(ridge_linear_len100_t3,package = "ConformalSmallest")
=ridge_linear_cov100_t3[[1]]
dim=ridge_linear_cov100_t3[[2]]
cov.param_linear_fm_t3=ridge_linear_cov100_t3[[3]]
cov.naive_linear_fm_t3=ridge_linear_cov100_t3[[4]]
cov.vfcp_linear_fm_t3=ridge_linear_cov100_t3[[5]]
cov.star_linear_fm_t3=ridge_linear_cov100_t3[[6]]
cov.cv5_linear_fm_t3=ridge_linear_cov100_t3[[7]]
cov.efcp_linear_fm_t3
=ridge_linear_len100_t3[[1]]
len.param_linear_fm_t3=ridge_linear_len100_t3[[2]]
len.naive_linear_fm_t3=ridge_linear_len100_t3[[3]]
len.vfcp_linear_fm_t3=ridge_linear_len100_t3[[4]]
len.star_linear_fm_t3=ridge_linear_len100_t3[[5]]
len.cv5_linear_fm_t3=ridge_linear_len100_t3[[6]]
len.efcp_linear_fm_t3
= len.param_linear_fm_t3/len.vfcp_linear_fm_t3
len.param = len.naive_linear_fm_t3/len.vfcp_linear_fm_t3
len.naive = len.star_linear_fm_t3/len.vfcp_linear_fm_t3
len.star = len.cv5_linear_fm_t3/len.vfcp_linear_fm_t3
len.cv5 = len.efcp_linear_fm_t3/len.vfcp_linear_fm_t3
len.efcp = len.vfcp_linear_fm_t3/len.vfcp_linear_fm_t3
len.vfcp
=data.frame(dim,apply(cov.param_linear_fm_t3,2,mean),apply(cov.naive_linear_fm_t3,2,mean),apply(cov.vfcp_linear_fm_t3,2,mean),apply(cov.star_linear_fm_t3,2,mean),apply(cov.cv5_linear_fm_t3,2,mean), apply(cov.efcp_linear_fm_t3,2,mean))
df.cov3=data.frame(dim,apply(cov.param_linear_fm_t3,2,sd),apply(cov.naive_linear_fm_t3,2,sd),apply(cov.vfcp_linear_fm_t3,2,sd),apply(cov.star_linear_fm_t3,2,sd),apply(cov.cv5_linear_fm_t3,2,sd), apply(cov.efcp_linear_fm_t3,2,sd))/sqrt(nrow(len.param))
df.cov3_sd
=data.frame(dim,apply(len.param,2,mean),apply(len.naive,2,mean),apply(len.vfcp,2,mean),apply(len.star,2,mean),apply(len.cv5,2,mean), apply(len.efcp,2,mean))
df.len3=data.frame(dim,apply(len.param,2,sd),apply(len.naive,2,sd),apply(len.vfcp,2,sd),apply(len.star,2,sd),apply(len.cv5,2,sd), apply(len.efcp,2,sd))/sqrt(nrow(len.param))
df.len3_sd
data(ridge_linear_cov100_t5,package = "ConformalSmallest")
data(ridge_linear_len100_t5,package = "ConformalSmallest")
=ridge_linear_cov100_t5[[1]]
dim=ridge_linear_cov100_t5[[2]]
cov.param_linear_fm_t5=ridge_linear_cov100_t5[[3]]
cov.naive_linear_fm_t5=ridge_linear_cov100_t5[[4]]
cov.vfcp_linear_fm_t5=ridge_linear_cov100_t5[[5]]
cov.star_linear_fm_t5=ridge_linear_cov100_t5[[6]]
cov.cv5_linear_fm_t5=ridge_linear_cov100_t5[[7]]
cov.efcp_linear_fm_t5
=ridge_linear_len100_t5[[1]]
len.param_linear_fm_t5=ridge_linear_len100_t5[[2]]
len.naive_linear_fm_t5=ridge_linear_len100_t5[[3]]
len.vfcp_linear_fm_t5=ridge_linear_len100_t5[[4]]
len.star_linear_fm_t5=ridge_linear_len100_t5[[5]]
len.cv5_linear_fm_t5=ridge_linear_len100_t5[[6]]
len.efcp_linear_fm_t5
= len.param_linear_fm_t5/len.vfcp_linear_fm_t5
len.param = len.naive_linear_fm_t5/len.vfcp_linear_fm_t5
len.naive = len.star_linear_fm_t5/len.vfcp_linear_fm_t5
len.star = len.cv5_linear_fm_t5/len.vfcp_linear_fm_t5
len.cv5 = len.efcp_linear_fm_t5/len.vfcp_linear_fm_t5
len.efcp = len.vfcp_linear_fm_t5/len.vfcp_linear_fm_t5
len.vfcp
=data.frame(dim,apply(cov.param_linear_fm_t5,2,mean),apply(cov.naive_linear_fm_t5,2,mean),apply(cov.vfcp_linear_fm_t5,2,mean),apply(cov.star_linear_fm_t5,2,mean),apply(cov.cv5_linear_fm_t5,2,mean), apply(cov.efcp_linear_fm_t5,2,mean))
df.cov5=data.frame(dim,apply(cov.param_linear_fm_t5,2,sd),apply(cov.naive_linear_fm_t5,2,sd),apply(cov.vfcp_linear_fm_t5,2,sd),apply(cov.star_linear_fm_t5,2,sd),apply(cov.cv5_linear_fm_t5,2,sd), apply(cov.efcp_linear_fm_t5,2,sd))/sqrt(nrow(len.param))
df.cov5_sd
=data.frame(dim,apply(len.param,2,mean),apply(len.naive,2,mean),apply(len.vfcp,2,mean),apply(len.star,2,mean),apply(len.cv5,2,mean), apply(len.efcp,2,mean))
df.len5=data.frame(dim,apply(len.param,2,sd),apply(len.naive,2,sd),apply(len.vfcp,2,sd),apply(len.star,2,sd),apply(len.cv5,2,sd), apply(len.efcp,2,sd))/sqrt(nrow(len.param))
df.len5_sd
<- theme_get()$panel.background$fill
bgnd =c("Linear","Naive","VFCP","CV*","CV-5-fold","EFCP")
names#colors_manual=c("blue","#FF9933","#999000","66FFCC","#66CC99","#9999FF","#FF00CC")
=c("red","slategrey","darkorchid3","dodgerblue4","turquoise3","grey23")
colors_manual
=c(0,1,2,3,4,5)
shape_manual
=seq(2,60,by=2)
seq
=c("dim","cov.param","cov.naive","cov.vfcp","cov.star","cov.cv5","cov.efcp")
col_namesnames(df.cov3)=names(df.cov5)=names(df.cov3_sd)=names(df.cov5_sd)=col_names
=c("dim","len.param","len.naive","len.vfcp","len.star","len.cv5","len.efcp")
col_namesnames(df.len3)=names(df.len5)=names(df.len3_sd)=names(df.len5_sd)=col_names
= rbind(df.cov3[seq,], df.cov5[seq,])
df.cov = rbind(df.cov3_sd[seq,], df.cov5_sd[seq,])
df.cov_sd = rbind(df.len3[seq,], df.len5[seq,])
df.len = rbind(df.len3_sd[seq,], df.len5_sd[seq,]) df.len_sd
= c("3rd moment", "5th moment")
Moment = expand.grid(seq,Moment,names)
df_names
= as.vector(as.matrix(df.cov[,-1]))
cov_vec = as.vector(as.matrix(df.cov_sd[,-1]))
cov_sd_vec = cbind(df_names[,1], cov_vec, cov_sd_vec , df_names[,-1] )
cov
= as.vector(as.matrix(df.len[,-1]))
len_vec = as.vector(as.matrix(df.len_sd[,-1]))
len_sd_vec = cbind(df_names[,1], len_vec, len_sd_vec , df_names[,-1])
len
$Var="Coverage"
cov$Var = "Width ratio"
len
colnames(cov) = c("V1","V2","sd","Moment","Method","Var")
colnames(len) = c("V1","V2","sd","Moment","Method","Var")
=rbind(cov,len)
data_linear_fm<- data.frame(Var=c("Coverage"),Z= 0.9)
dummy_coverage
=ggplot(data=data_linear_fm, aes(x=V1, y=V2, group=Method,color=Method)) +
ggplot_linear_fmgeom_errorbar(aes(ymin=V2-sd, ymax=V2+sd), width=.1)+
geom_line(size = 0.8, aes(color=Method))+ #,linetype=Method)) +
#geom_point(size=5, colour=bgnd)+
#geom_point(aes(shape=Method,color=Method)) +
theme(#axis.text.y = element_blank(),
#axis.ticks.y = element_blank(),
axis.title.y = element_blank(),
#plot.title = element_text(size = 8,hjust=0.5),
#axis.text = element_text(size = 10),
#axis.title =element_text(size = 10),
legend.position="top") +
facet_grid(Var~Moment,scales = "free")+
#scale_shape_manual(values=shape_manual) +
scale_color_manual(values=colors_manual) +
#ylim(0,1.2) + #xlab("dim") + ylab("Avg-Len")
xlab("Dimension")+guides(shape=FALSE)+
theme(strip.text.x = element_text(size = 12, face = "bold"), strip.text.y = element_text(size=12, face="bold"), axis.ticks.length=unit(.25, "cm"),axis.title=element_text(size=12))+ geom_hline(data=dummy_coverage, aes(yintercept=Z), linetype="dashed")
ggplot_linear_fm
library(repr)
library(glmnet)
#> Loading required package: Matrix
#> Loaded glmnet 4.0-2
library(MASS)
library(mvtnorm)
=paste("linear_fm_t3",sep="")
nameset.seed(2021)
<- 3 #degrees of freedom
df <- 1 #number of dimensions
l <- 100
l.lambda <- seq(0,200,l=l.lambda)
lambda_seq <- 5
dim <- 0.1
alpha <- 200 #number of training samples
n <- 100 #number of prediction points
n0 <- 100 #number of independent trials
nrep <- 0.5
rho
<- lambda.vfcp <- lambda.star <- lambda.cv5 <- rep(0,nrep)
lambda.efcp
for(i in 1:nrep){
for (r in 1:l){
<- dim[r]
d set.seed(i)
<- matrix(rho,d,d)
Sigma diag(Sigma) <- rep(1,d)
<- rmvt(n,Sigma,df) #multivariate t distribution
X <- rep(1:5,d/5)
beta <- rt(n,df)*(1+sqrt(X[,1]^2+X[,2]^2))
eps <- X%*%beta+eps
Y
<- rmvt(n0,Sigma,df)
X0 <- rt(n0,df)*(1+sqrt(X0[,1]^2+X0[,2]^2))
eps0 <- X0%*%beta+eps0
Y0
<- efcp_ridge(X,Y,X0,lambda=lambda_seq,alpha=alpha)
out.efcp <- out.efcp$lambda
lambda.efcp[i]
<- vfcp_ridge(X,Y,X0,lambda=lambda_seq,alpha=alpha)
out.vfcp <- out.vfcp$lambda
lambda.vfcp[i]
<- star.fun(X,Y,X0,lambda=lambda_seq,alpha=alpha)
out.star <- out.star$lambda
lambda.star[i]
<- cv.fun(X,Y,X0,lambda=lambda_seq,alpha=alpha,nfolds=5)
out.cv5 <- out.cv5$lambda
lambda.cv5[i]
}
}#options(repr.plot.width=18, repr.plot.height=6)
#par(mar=c(1,1,1,1))
=par(mfrow=c(2,2))
oldparhist(lambda.cv5,breaks=seq(min(lambda.cv5)-1,max(lambda.cv5)+1, by = 2),xlab=expression(lambda), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,main="CV-2splits") #make x,y, values, main larger
#> Error in plot.new(): figure margins too large
hist(lambda.star,breaks=seq(min(lambda.star)-1,max(lambda.star)+1, by = 2),xlab=expression(lambda), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,main="CV-3splits")
#> Error in plot.new(): figure margins too large
hist(lambda.efcp,breaks=seq(min(lambda.efcp)-1,max(lambda.efcp)+1, by = 2),xlab=expression(lambda), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,main="EFCP")
#> Error in plot.new(): figure margins too large
hist(lambda.vfcp,breaks=seq(min(lambda.vfcp)-1,max(lambda.vfcp)+1, by = 2),xlab=expression(lambda), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,main="VFCP")
#> Error in plot.new(): figure margins too large
The above plot shows that all four methods mostly use \(\lambda\) close to zero, which is expected given that we are considering a dimension example where the underlying data generating mechanism is a linear setting.
=paste("nonlinear_fm_t3",sep="")
nameset.seed(2021)
<- 3 #degrees of freedom
df <- 1 #number of dimensions
l <- 100
l.lambda <- seq(0,200,l=l.lambda)
lambda_seq <- 5
dim <- 0.1
alpha <- 200 #number of training samples
n <- 100 #number of prediction points
n0 <- 100 #number of independent trials
nrep <- 0.5
rho
<- lambda.vfcp <- lambda.star <- lambda.cv5 <- rep(0,nrep)
lambda.efcp
for(i in 1:nrep){
for (r in 1:l){
<- dim[r]
d set.seed(i)
=matrix(rho,d,d)
Sigmadiag(Sigma)=rep(1,d)
=rmvt(n,Sigma,df) #multivariate t distribution
X
=rt(n,df)*(1+sqrt(X[,1]^2+X[,2]^2))
eps1=rt(n,df)*(1+sqrt(X[,1]^4+X[,2]^4))
eps2=rpois(n,sin(X[,1])^2 + cos(X[,2])^4+0.01 )+0.03*X[,1]*eps1+25*(runif(n,0,1)<0.01*eps2)
Y
=rmvt(n0,Sigma,df)
X0=rt(n0,df)*(1+sqrt(X0[,1]^2+X0[,2]^2))
eps01=rt(n0,df)*(1+sqrt(X0[,1]^4+X0[,2]^4))
eps02=rpois(n0,sin(X0[,1])^2 + cos(X0[,2])^4+0.01 )+0.03*X0[,1]*eps01+25*(runif(n0,0,1)<0.01)*eps02
Y0
<- efcp_ridge(X,Y,X0,lambda=lambda_seq,alpha=alpha)
out.efcp <- out.efcp$lambda
lambda.efcp[i]
<- vfcp_ridge(X,Y,X0,lambda=lambda_seq,alpha=alpha)
out.vfcp <- out.vfcp$lambda
lambda.vfcp[i]
<- star.fun(X,Y,X0,lambda=lambda_seq,alpha=alpha)
out.star <- out.star$lambda
lambda.star[i]
<- cv.fun(X,Y,X0,lambda=lambda_seq,alpha=alpha,nfolds=5)
out.cv5 <- out.cv5$lambda
lambda.cv5[i]
}
}
#options(repr.plot.width=18, repr.plot.height=6)
=par(mfrow=c(2,2))
oldpar#par(mar=c(1,1,1,1))
hist(lambda.cv5,breaks=seq(min(lambda.cv5)-1,max(lambda.cv5)+1, by = 2),xlab=expression(lambda), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,main="CV-2splits") #make x,y, values, main larger
#> Error in plot.new(): figure margins too large
hist(lambda.star,breaks=seq(min(lambda.star)-1,max(lambda.star)+1, by = 2),xlab=expression(lambda), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,main="CV-3splits")
#> Error in plot.new(): figure margins too large
hist(lambda.efcp,breaks=seq(min(lambda.efcp)-1,max(lambda.efcp)+1, by = 2),xlab=expression(lambda), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,main="EFCP")
#> Error in plot.new(): figure margins too large
hist(lambda.vfcp,breaks=seq(min(lambda.vfcp)-1,max(lambda.vfcp)+1, by = 2),xlab=expression(lambda), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,main="VFCP")
#> Error in plot.new(): figure margins too large
par(oldpar)
The above plots show that all four methods mostly choose the penalty parameter \(\lambda\) to be as large as possible, which is also expected given that the underlying data generating mechanism is a nonlinear setting.