The following vignette contains code to accompany the paper “Markov unchained: a guided walk through the Metropolis algorithm.” The code walks the reader through
A case control study of leukemia (y=1) and residence around strong magnetic fields (x=1)
y = c(rep(1, 36), rep(0, 198)) # leukemia cases
x = c(rep(1, 3), rep(0, 33), rep(1, 5), rep(0, 193)) # exposure
These functions will be used throughout
#helper functions
expit <- function(mu) 1/(1+exp(-mu))
loglik = function(y,x,beta){
# calculate the log likelihood
lli = dbinom(y, 1, expit(beta[1] + x*beta[2]), log=TRUE)
sum(lli)
}
riskdifference = function(y,x,beta){
# baseline odds (offset)
# calculate a risk difference
poprisk = 4.8/100000
popodds = poprisk/(1-poprisk)
studyodds = mean(y)/(1-mean(y))
r1 = expit(log(popodds/studyodds) + beta[1] + beta[2])
r0 = expit(log(popodds/studyodds) + beta[1])
mean(r1-r0)
}
data = data.frame(leuk=y, magfield=x)
mod = glm(leuk ~ magfield, family=binomial(), data=data)
summary(mod)$coefficients
beta1 = summary(mod)$coefficients[2,1]
se1 = summary(mod)$coefficients[2,2]
cat("\n\nMaximum likelihood beta coefficient (95% CI)\n")
round(c(beta=beta1, ll=beta1+se1*qnorm(0.025), ul=beta1+se1*qnorm(0.975)), 2)
cat("\n\nMaximum likelihood odds ratio (95% CI)\n")
round(exp(c(beta=beta1, ll=beta1+se1*qnorm(0.025), ul=beta1+se1*qnorm(0.975))), 2)
cat("\n\nMaximum likelihood risk difference (multiplied by 1000) \n")
round(c(rd_1000=riskdifference(y,x,mod$coefficients)*1000), 2)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.766183 0.188373 -9.375988 6.853094e-21
## magfield 1.255357 0.754200 1.664488 9.601492e-02
##
##
## Maximum likelihood beta coefficient (95% CI)
## beta ll ul
## 1.26 -0.22 2.73
##
##
## Maximum likelihood odds ratio (95% CI)
## beta ll ul
## 3.51 0.80 15.39
##
##
## Maximum likelihood risk difference (multiplied by 1000)
## rd_1000
## 0.11
# initialize
M=10000
set.seed(91828)
beta_post = matrix(nrow=M, ncol=2)
colnames(beta_post) = c('beta0', 'beta1')
accept = numeric(M)
rd = numeric(M)
beta_post[1,] = c(2,-3)
rd[1] = riskdifference(y,x,beta_post[1,])
accept[1] = 1
for(i in 2:M){
oldb = beta_post[i-1,]
prop = rnorm(2, sd=0.2)
newb = oldb+prop
num = loglik(y,x,newb)
den = loglik(y,x,oldb)
acceptprob = exp(num-den)
acc = (acceptprob > runif(1))
if(acc){
beta_post[i,] = newb
accept[i] = 1
}else{
beta_post[i,] = oldb
accept[i] = 0
}
rd[i] = 1000*riskdifference(y,x,beta_post[i,])
}
mean(accept)
## [1] 0.6551
summary(beta_post)
## beta0 beta1
## Min. :-2.518 Min. :-3.9483
## 1st Qu.:-1.902 1st Qu.: 0.7389
## Median :-1.776 Median : 1.2292
## Mean :-1.770 Mean : 1.1714
## 3rd Qu.:-1.651 3rd Qu.: 1.7004
## Max. : 2.000 Max. : 3.9189
init = beta_post[1,]
postmean = apply(beta_post[-c(1:1000),], 2, mean)
cat("Posterior mean\n")
## Posterior mean
round(postmean, 2)
## beta0 beta1
## -1.78 1.22
plot(beta_post, pch=19, col=rgb(0,0,0,0.05), xlab=expression(beta[0]), ylab=expression(beta[1]), xlim=c(-2.5,2.5), ylim=c(-4.5,4.5))
points(init[1], init[2], col="red", pch=19)
points(postmean[1], postmean[2], col="orange", pch=19)
legend("topright", col=c("red", "orange"), legend=c("Initial value", "Post. mean"), pch=19)
plot(beta_post[,2], type='l', ylab=expression(beta[1]), xlab="Iteration", ylim=c(-4, 4))
plot(rd, type='l', ylab="RD*1000", xlab="Iteration", ylim=c(-4, 4))
plot(density(beta_post[-c(1:1000),2]), xlab=expression(beta[1]), ylab="Density", main="")
plot(density(rd[-c(1:1000)]), xlab="RD*1000", ylab="Density", main="")
# initialize
M=10000
set.seed(91828)
beta_post_guide = matrix(nrow=M, ncol=2)
colnames(beta_post_guide) = c('beta0', 'beta1')
accept = numeric(M)
rd_guide = numeric(M)
beta_post_guide[1,] = c(2,-3)
rd_guide[1] = riskdifference(y,x,beta_post_guide[1,])
accept[1] = 1
dir = 1
for(i in 2:M){
oldb = beta_post_guide[i-1,]
prop = dir*abs(rnorm(2, sd=0.2))
newb = oldb+prop
num = loglik(y,x,newb)
den = loglik(y,x,oldb)
acceptprob = exp(num-den)
acc = (acceptprob > runif(1))
if(acc){
beta_post_guide[i,] = newb
accept[i] = 1
}else{
beta_post_guide[i,] = oldb
accept[i] = 0
dir = dir*-1
}
rd_guide[i] = 1000*riskdifference(y,x,beta_post_guide[i,])
}
postmean = apply(beta_post_guide[-c(1:1000),], 2, mean)
cat("Posterior mean, guided\n")
## Posterior mean, guided
round(postmean, 2)
## beta0 beta1
## -1.80 1.41
col1 = rgb(0,0,0,.5)
col2 = rgb(1,0,0,.35)
par(mfcol=c(1,2))
#trace plots
plot(beta_post[1:200,2], type='l', ylab=expression(beta[1]), xlab="Iteration", ylim=c(-4, 4), col=col1)
lines(beta_post_guide[1:200,2], col=col2)
legend("topright", lty=1, col=c(col1, col2), legend=c("Rand. walk", "Guided"))
plot(9800:10000, beta_post[9800:10000,2], type='l', ylab=expression(beta[1]), xlab="Iteration", ylim=c(-4, 4), col=col1)
lines(9800:10000, beta_post_guide[9800:10000,2], col=col2)
legend("topright", lty=1, col=c(col1, col2), legend=c("Rand. walk", "Guided"))
# density plots
plot(density(beta_post_guide[-c(1:1000),2]), col=col2, xlab=expression(beta[1]), ylab="Density", main="")
lines(density(beta_post[-c(1:1000),2]), col=col1)
legend("bottomright", lty=1, col=c(col1, col2), legend=c("Rand. walk", "Guided"))
plot(density(rd_guide[-c(1:1000)]), xlab="RD*1000", ylab="Density", main="", col=col2)
lines(density(rd[-c(1:1000)]), col=col1)
legend("topright", lty=1, col=c(col1, col2), legend=c("Rand. walk", "Guided"))
par(mfcol=c(1,1))
# initialize
M=10000
burnin=1000
set.seed(91828)
beta_post_adaptguide = matrix(nrow=M+burnin, ncol=2)
colnames(beta_post_adaptguide) = c('beta0', 'beta1')
accept = numeric(M+burnin)
rd_adaptguide = numeric(M+burnin)
beta_post_adaptguide[1,] = c(2,-3)
rd_adaptguide[1] = riskdifference(y,x,beta_post[1,])
accept[1] = 1
prop.sigma = c(0.2, 0.2)
dir = 1
for(i in 2:(M+burnin)){
if((i < burnin) & (i > 25)){
prop.sigma = apply(beta_post_adaptguide[max(1, i-100):(i-1),], 2, sd)
}
oldb = beta_post_adaptguide[i-1,]
prop = dir*abs(rnorm(2, sd=prop.sigma))
newb = oldb+prop
num = loglik(y,x,newb)
den = loglik(y,x,oldb)
acceptprob = exp(num-den)
acc = (acceptprob > runif(1))
if(acc){
beta_post_adaptguide[i,] = newb
accept[i] = 1
}else{
beta_post_adaptguide[i,] = oldb
accept[i] = 0
dir = dir*-1
}
rd_adaptguide[i] = 1000*riskdifference(y,x,beta_post_adaptguide[i,])
}
postmean = apply(beta_post_adaptguide[-c(1:1000),], 2, mean)
cat("Posterior mean, guided and adaptive\n")
## Posterior mean, guided and adaptive
round(postmean, 2)
## beta0 beta1
## -1.78 1.22
col1 = rgb(0,0,0,.5)
col2 = rgb(1,0,0,.35)
par(mfcol=c(1,2))
#trace plots
plot(beta_post[1:200,2], type='l', ylab=expression(beta[1]), xlab="Iteration", ylim=c(-4, 4), col=col1)
lines(beta_post_adaptguide[1:200,2], col=col2)
legend("topright", lty=1, col=c(col1, col2), legend=c("Rand. walk", "Guided, adaptive"))
plot(9800:10000, beta_post[9800:10000,2], type='l', ylab=expression(beta[1]), xlab="Iteration", ylim=c(-4, 4), col=col1)
lines(9800:10000, beta_post_adaptguide[9800:10000,2], col=col2)
legend("topright", lty=1, col=c(col1, col2), legend=c("Rand. walk", "Guided, adaptive"))
# density plots
plot(density(beta_post_adaptguide[-c(1:1000),2]), col=col2, xlab=expression(beta[1]), ylab="Density", main="")
lines(density(beta_post[-c(1:1000),2]), col=col1)
legend("bottomright", lty=1, col=c(col1, col2), legend=c("Rand. walk", "Guided, adaptive"))
plot(density(rd_adaptguide[-c(1:1000)]), xlab="RD*1000", ylab="Density", main="", col=col2)
lines(density(rd[-c(1:1000)]), col=col1)
legend("topright", lty=1, col=c(col1, col2), legend=c("Rand. walk", "Guided, adaptive"))
par(mfcol=c(1,1))
# initialize
M=10000
burnin=1000
set.seed(91828)
beta_post_adaptguide2 = matrix(nrow=M+burnin, ncol=2)
colnames(beta_post_adaptguide2) = c('beta0', 'beta1')
accept = numeric(M+burnin)
rd_adaptguide2 = numeric(M+burnin)
beta_post_adaptguide2[1,] = c(2,-3)
rd_adaptguide2[1] = riskdifference(y,x,beta_post[1,])
accept[1] = 1
prop.sigma = c(0.2, 0.2)
dir = 1
for(i in 2:(M+burnin)){
if((i < burnin) & (i > 25)){
prop.sigma = apply(beta_post_adaptguide2[max(1, i-100):(i-1),], 2, sd)
}
oldb = beta_post_adaptguide2[i-1,]
prop = dir*abs(rnorm(2, sd=prop.sigma))
newb = oldb+prop
num = loglik(y,x,newb) + dnorm(newb[1], mean=0, sd=sqrt(100), log=TRUE) + dnorm(newb[2], mean=0, sd=sqrt(0.5), log=TRUE)
den = loglik(y,x,oldb) + dnorm(oldb[1], mean=0, sd=sqrt(100), log=TRUE) + dnorm(oldb[2], mean=0, sd=sqrt(0.5), log=TRUE)
acceptprob = exp(num-den)
acc = (acceptprob > runif(1))
if(acc){
beta_post_adaptguide2[i,] = newb
accept[i] = 1
}else{
beta_post_adaptguide2[i,] = oldb
accept[i] = 0
dir = dir*-1
}
rd_adaptguide2[i] = 1000*riskdifference(y,x,beta_post_adaptguide2[i,])
}
postmean = apply(beta_post_adaptguide2[-c(1:1000),], 2, mean)
cat("Posterior mean, guided and adaptive\n")
## Posterior mean, guided and adaptive
round(postmean, 2)
## beta0 beta1
## -1.75 0.54
mean(accept)
## [1] 0.5552727
init = beta_post_adaptguide[1,]
postmean = apply(beta_post_adaptguide[-c(1:1000),], 2, mean)
cat("Posterior mean, uniform priors\n")
## Posterior mean, uniform priors
round(postmean, 2)
## beta0 beta1
## -1.78 1.22
init2 = beta_post_adaptguide2[1,]
postmean2 = apply(beta_post_adaptguide2[-c(1:1000),], 2, mean)
cat("Posterior mean, informative normal priors\n")
## Posterior mean, informative normal priors
round(postmean2, 2)
## beta0 beta1
## -1.75 0.54
par(mfcol=c(1,2))
plot(beta_post_adaptguide, pch=19, col=rgb(0,0,0,0.05), xlab=expression(beta[0]), ylab=expression(beta[1]), xlim=c(-2.5,2.5), ylim=c(-4.5,4.5), main="Uniform priors")
points(init[1], init[2], col="red", pch=19)
points(postmean[1], postmean[2], col="orange", pch=19)
legend("topright", col=c("red", "orange"), legend=c("Initial value", "Post. mean"), pch=19)
plot(beta_post_adaptguide2, pch=19, col=rgb(0,0,0,0.05), xlab=expression(beta[0]), ylab=expression(beta[1]), xlim=c(-2.5,2.5), ylim=c(-4.5,4.5), main="Informative priors")
points(init2[1], init2[2], col="red", pch=19)
points(postmean2[1], postmean2[2], col="orange", pch=19)
legend("topright", col=c("red", "orange"), legend=c("Initial value", "Post. mean"), pch=19)
par(mfcol=c(1,2))
plot(beta_post_adaptguide[,2], type='l', ylab=expression(beta[1]), xlab="Iteration", ylim=c(-4, 4))
plot(beta_post_adaptguide2[,2], type='l', ylab=expression(beta[1]), xlab="Iteration", ylim=c(-4, 4))
plot(density(beta_post_adaptguide[-c(1:1000),2]), xlab=expression(beta[1]), ylab="Density", main="", xlim=c(-4, 4))
plot(density(beta_post_adaptguide2[-c(1:1000), 2]), xlab=expression(beta[1]), ylab="Density", main="", xlim=c(-4, 4))
par(mfcol=c(2,1))
plot(rd_adaptguide, type='l', ylab="RD*1000", xlab="Iteration", ylim=c(-.2, .5))
plot(rd_adaptguide2, type='l', ylab="RD*1000", xlab="Iteration", ylim=c(-.2, .5))
plot(density(rd_adaptguide[-c(1:1000)]), xlab=expression(beta[1]), ylab="Density", main="", xlim=c(-.2, .5))
plot(density(rd_adaptguide2[-c(1:1000)]), xlab="RD*1000", ylab="Density", main="", xlim=c(-.2, .5))
par(mfcol=c(1,1))
Given what you know, running the R package function metropolis_glm
should
be fairly straightforward. The following example calls in the case-control data used above
and compares a randome Walk metropolis algorithmn (with N(0, 0.05), N(0, 0.1) proposal distribution) with a guided, adaptive algorithm.
library(metropolis)
## Loading required package: coda
##
## Attaching package: 'metropolis'
## The following object is masked _by_ '.GlobalEnv':
##
## expit
data("magfields", package="metropolis")
# random walk
res.rw = metropolis_glm(y ~ x, data=magfields, family=binomial(), iter=20000, burnin=3000, adapt=FALSE, guided=FALSE, block=TRUE, inits=c(2,-3), control = metropolis.control(prop.sigma.start = c(0.05, .1)))
summary(res.rw, keepburn = FALSE)
## $nsamples
## [1] 20000
##
## $sd
## (Intercept) x
## 0.1870940 0.8565178
##
## $se
## (Intercept) x
## 0.0109079 0.1120727
##
## $ESS_parms
## [1] 294.19637 58.40808
##
## $postmean
## mean normal_lci normal_uci
## (Intercept) -1.778688 -2.1453924 -1.411984
## x 1.240300 -0.4384751 2.919075
##
## $postmedian
## median pctl_lci pctl_uci
## (Intercept) -1.774736 -2.1594707 -1.413966
## x 1.260167 -0.5570205 2.934671
##
## $postmode
## mode hpd_lci hpd_uci
## (Intercept) -1.766332 -2.1746278 -1.431942
## x 1.257571 -0.5541435 2.938285
plot(res.rw, par = 1:2, keepburn=TRUE)
# guided, adaptive
res.ga = metropolis_glm(y ~ x, data=magfields, family=binomial(), iter=20000, burnin=3000, adapt=TRUE, guided=TRUE, block=FALSE, inits=c(2,-3))
summary(res.ga, keepburn = FALSE)
## $nsamples
## [1] 20000
##
## $sd
## (Intercept) x
## 0.1887433 0.8039617
##
## $se
## (Intercept) x
## 0.002139775 0.009397736
##
## $ESS_parms
## [1] 7780.489 7318.537
##
## $postmean
## mean normal_lci normal_uci
## (Intercept) -1.783400 -2.1533369 -1.413463
## x 1.195689 -0.3800755 2.771454
##
## $postmedian
## median pctl_lci pctl_uci
## (Intercept) -1.779390 -2.1692698 -1.429546
## x 1.204767 -0.4308701 2.719736
##
## $postmode
## mode hpd_lci hpd_uci
## (Intercept) -1.764664 -2.1311923 -1.396892
## x 1.246514 -0.4152461 2.725762
plot(res.ga, par = 1:2, keepburn=TRUE)
Finally, we can use the “glm” option in initial values to initialize the chain with the MLE estimates. This can eke out slightly more efficiency and allow reduced burnin.
# guided, adaptive
res.ga.init = metropolis_glm(y ~ x, data=magfields, family=binomial(), iter=20000, burnin=1000, adapt=TRUE, guided=TRUE, block=FALSE, inits="glm")
summary(res.ga.init, keepburn = FALSE)
## $nsamples
## [1] 20000
##
## $sd
## (Intercept) x
## 0.1910264 0.8117856
##
## $se
## (Intercept) x
## 0.002187349 0.009279116
##
## $ESS_parms
## [1] 7626.942 7653.666
##
## $postmean
## mean normal_lci normal_uci
## (Intercept) -1.779687 -2.1540987 -1.405275
## x 1.190642 -0.4004577 2.781742
##
## $postmedian
## median pctl_lci pctl_uci
## (Intercept) -1.774436 -2.1665633 -1.418378
## x 1.230554 -0.5138752 2.703918
##
## $postmode
## mode hpd_lci hpd_uci
## (Intercept) -1.766599 -2.1459323 -1.399196
## x 1.246608 -0.4553881 2.749118
plot(res.ga.init, par = 1:2, keepburn=TRUE)
Using the function, “risk difference” from above, we can use the output from our prior model to get Bayesian estimates of the risk diffence. In general, this is a useful way to extend MCMC simulations to new estimands that may not be directly parameterized in the model.
# guided, adaptive
beta = as.matrix(res.ga.init$parms[, c("b_0", "b_1")])
y = magfields$y
X = cbind(rep(1, dim(magfields)[1]), magfields$x)
1000*riskdifference(y,X,beta[1,])
## [1] 0.1132425
# calculate risk difference for every value of model coefficients
rd.ga.init = apply(beta, 1, function(b) 1000*riskdifference(y,X,b))
par(mfcol=c(2,1))
plot(density(rd.ga.init), xlab = "RD*1000", ylab="Kernel density", main="", xlim=c(-.2, 2.5))
plot(rd.ga.init, type='l', xlab = "RD*1000", ylab="Iteration", ylim=c(-.2, 2.5))
par(mfcol=c(1,1))
# posterior mean, median, credible intervals
mean(rd.ga.init[-c(1:1000)])
## [1] 0.1517853
quantile(rd.ga.init[-c(1:1000)], p = c(.5, .025, .975) )
## 50% 2.5% 97.5%
## 0.10763217 -0.01949789 0.59457738