JAGS (Just Another Gibbs Sampler) is a system that builds MCMC samplers. For a comprehensive overview of JAGS, see the user manual http://people.stat.sc.edu/hansont/stat740/jags_user_manual.pdf. The goal today is to install JAGS and learn a little about the basic functionality by executing code from R.
rjags
package.JAGS is designed for inference on Bayesian models using Markov Chain Monte Carlo (MCMC) simulation. Running a model refers to generating samples from the posterior distribution of the model parameters. This takes place in five steps:
The next stages of analysis are done outside of JAGS: convergence diagnostics, model criticism, and summarizing the samples must be done using other packages more suited to this task. We will use the coda
package to assess MCMC samples.
## JAGS lab - STAT 532
require(rjags) # Must have previously installed package rjags.
## Loading required package: rjags
## Loading required package: coda
## Linked to JAGS 4.3.0
## Loaded modules: basemod,bugs
# Simulate the data from binomial distribution:
set.seed(09282017)
n <- 50
theta.true <- .3
y.sim <- rbinom(n, 1, theta.true)
dataList = list( # Put the information into a list.
y = y.sim ,
Ntotal = n
)
# Define the model as a string object:
modelString = "
model {
for ( i in 1:Ntotal ) {
y[i] ~ dbern( theta )
}
theta ~ dbeta( 1 , 1 )
}
"
writeLines( modelString , con="TEMPmodel.txt" ) #creates a text file object
# Initialize the chains based on MLE of data.
# Option: Use function that generates random values for each chain:
initsList = function() {
resampledY = sample( y.sim , replace=TRUE )
thetaInit = sum(resampledY)/length(resampledY)
thetaInit = 0.001+0.998*thetaInit # keep away from 0,1
return( list( theta=thetaInit ) )
}
initsList = function() {
return( list( theta=runif(1) ) )
}
initsList()
## $theta
## [1] 0.7358637
initsList
function is doing.# Run the chains:
jagsModel = jags.model( file="TEMPmodel.txt" , data=dataList , inits=initsList , n.chains=3 , n.adapt=500 )
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 50
## Unobserved stochastic nodes: 1
## Total graph size: 53
##
## Initializing model
jags.model()
function.codaSamples = coda.samples( jagsModel , variable.names=c("theta") ,
n.iter=5000)
summary(codaSamples)
##
## Iterations = 1:5000
## Thinning interval = 1
## Number of chains = 3
## Sample size per chain = 5000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## 0.3265919 0.0646055 0.0005275 0.0005275
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## 0.2082 0.2813 0.3240 0.3693 0.4594
plot(codaSamples)
traceplot(codaSamples)
gelman.plot(codaSamples)
HPDinterval(codaSamples)
## [[1]]
## lower upper
## theta 0.2058177 0.4524969
## attr(,"Probability")
## [1] 0.95
##
## [[2]]
## lower upper
## theta 0.2052029 0.4595619
## attr(,"Probability")
## [1] 0.95
##
## [[3]]
## lower upper
## theta 0.2127423 0.4610159
## attr(,"Probability")
## [1] 0.95
### Normal Model
N <- 1000
mu <- 5
tau.sq <- 1/5 # precision = 1 / sigma^2
x <- rnorm(N, mu, sqrt(1/tau.sq))
# Define the model:
modelString = "
model {
for (i in 1:N) {
x[i] ~ dnorm(mu, tau.sq)
}
mu ~ dnorm(0, .0001)
tau.sq ~ dgamma(.1, .1)
}
"
writeLines( modelString , con="TEMPmodel.txt" )
Note the dollar sign can be used to embed latex commands in line, \(p(\mu) \sim\). Two dollar signs are used to signify equations.
\[p(\tau^2) \sim \]
jags <- jags.model('TEMPmodel.txt',
data = list('x' = x,
'N' = N),
n.chains = 4,
n.adapt = 100)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 1000
## Unobserved stochastic nodes: 2
## Total graph size: 1006
##
## Initializing model
codaSamples = coda.samples( jags , variable.names=c("mu","tau.sq") ,
n.iter=1000)
summary(codaSamples)
##
## Iterations = 1:1000
## Thinning interval = 1
## Number of chains = 4
## Sample size per chain = 1000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## mu 4.962 0.072605 0.0011480 0.001148
## tau.sq 0.196 0.008682 0.0001373 0.000133
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## mu 4.8201 4.9136 4.9633 5.0134 5.104
## tau.sq 0.1798 0.1899 0.1958 0.2019 0.213
plot(codaSamples)
HPDinterval(codaSamples)
## [[1]]
## lower upper
## mu 4.8263264 5.1214677
## tau.sq 0.1802911 0.2127798
## attr(,"Probability")
## [1] 0.95
##
## [[2]]
## lower upper
## mu 4.8354364 5.1099165
## tau.sq 0.1780753 0.2117229
## attr(,"Probability")
## [1] 0.95
##
## [[3]]
## lower upper
## mu 4.8377865 5.117542
## tau.sq 0.1799035 0.213300
## attr(,"Probability")
## [1] 0.95
##
## [[4]]
## lower upper
## mu 4.8217917 5.1035861
## tau.sq 0.1806109 0.2129812
## attr(,"Probability")
## [1] 0.95
traceplot(codaSamples)