This package provides functions to generate ensembles of generalized linear models using a projected subset gradient descent algorithm.
You can install the stable version on R CRAN.
install.packages("PSGD", dependencies = TRUE)
You can install the development version from GitHub.
library(devtools)
::install_github("AnthonyChristidis/PSGD") devtools
# Required Libraries
library(mvnfast)
# Setting the parameters
<- 100
p <- 40
n <- 2000
n.test <- 0.2
sparsity <- 0.5
rho <- 3
SNR set.seed(0)
# Generating the coefficient
<- floor(p*sparsity)
p.active <- 4*log(n)/sqrt(n)
a <- 0.2
neg.prob <- (-1)^(rbinom(p.active, 1, neg.prob))*(a + abs(rnorm(p.active)))
nonzero.betas
# Correlation structure
<- matrix(0, p, p)
Sigma 1:p.active, 1:p.active] <- rho
Sigma[diag(Sigma) <- 1
<- c(nonzero.betas, rep(0 , p - p.active))
true.beta
# Computing the noise parameter for target SNR
<- as.numeric(sqrt((t(true.beta) %*% Sigma %*% true.beta)/SNR))
sigma.epsilon
# Simulate some data
set.seed(1)
<- mvnfast::rmvn(n, mu=rep(0,p), sigma=Sigma)
x.train <- 1 + x.train %*% true.beta + rnorm(n=n, mean=0, sd=sigma.epsilon)
y.train <- mvnfast::rmvn(n.test, mu=rep(0,p), sigma=Sigma)
x.test <- 1 + x.test %*% true.beta + rnorm(n.test, sd=sigma.epsilon)
y.test
# CV PSGD Ensemble
<- cv.PSGD(x = x.train, y = y.train, n_models = 5,
output model_type = c("Linear", "Logistic")[1], include_intercept = TRUE,
split = c(2, 3), size = c(10, 15),
max_iter = 20,
cycling_iter = 0,
n_folds = 5,
n_threads = 1)
<- coef(output, group_index = 1:output$n_models)
psgd.coef <- predict(output, newx = x.test, group_index = 1:output$n_models)
psgd.predictions mean((y.test - psgd.predictions)^2)/sigma.epsilon^2
This package is free and open source software, licensed under GPL (>= 2).