# S functions for inference from iterative simulation # Written 1 July 1991 by Andrew Gelman, Dept. of Statistics, UC Berkeley # Please mail all comments/questions to gelman@stat.berkeley.edu # Software can be freely used for non-commercial purposes and freely # distributed. Submitted to statlib on 29 April 1992. # Derivations appear in the article, "Inference from iterative simulation," # by A. Gelman and D. B. Rubin, in "Statistical Science," to appear. # How to Use These Programs # # Preparation: The results of m multiple sequences, each of length 2n, # of iterative simulation (e.g., the Metropolis algorithm or the # Gibbs sampler) used to simulate a probability distribution. # The starting points of the simulations should be drawn from a # distribution that is overdispersed relative to the "target" # distribution you are trying to simulate. # # At each iteration, all scalar parameters or summaries of interest # should be stored. The result will be an 2n by m matrix for each # scalar summary. # # To run: Use gpar (or gpar.log or gpar.logit) to do output analysis # on the simulations for each scalar summary of interest. For # each scalar summary r, gpar produces two results: # # a. Estimates of the distribution of r, # b. The estimated potential scale reduction; # i.e., the factor by which the estimated t intervals for r # might decrease if the iterative simulation is continued # forever. As n increases, the scale reduction approaches 1. # The following S functions are included: # gpar (Gibbs-parallel) takes a matrix r of simulations: each of # m columns is an independent iteratively simulated sequence of length # 2n. The first n steps of each sequence are discarded. # Output is a list of three vectors: # # post: (2.5%, 50%, 97.5%) quantiles for the target distribution # based on the Student-t distribution # quantiles: (2.5%, 25%, 50%, 75%, 97.5%) empirical quantiles of # the mn simulated values. # confshrink: 50% and 97.5% quantiles of a rough upper bound on # how much the confidence interval of "post" will shrink # if the iterative simulation is continued forever. # # If both components of confshrink are not near 1, you should # probably run the iterative simulation further. # gpar.log and gpar.logit are versions of gpar to be used for scalar # summaries that are all-positive or are constrained to lie # between 0 and 1. ############################################################################# # SUBROUTINES cov _ function (a,b) { m _ length(a) ((mean ((a-mean(a))*(b-mean(b)))) * m)/(m-1)} logit _ function (x) {log(x/(1-x))} invlogit _ function (x) {1/(1+exp(-x))} col.means _ function(mat) { ones _ matrix(1, nrow = 1, ncol = nrow(mat)) ones %*% mat/nrow(mat)} col.vars _ function(mat) { means _ col.means(mat) col.means(mat * mat) - means * means} # Chi-squared degrees of freedom estimated by method of moments # # (Assume A has a gamma sampling distribution and varA is an unbiased # estimate of the sampling variance of A.) chisqdf _ function (A, varA) {2*(A^2/varA)} ############################################################################# # MAIN PROGRAM gpar _ function (r) { alpha _ .05 # 95% intervals m _ ncol(r) x _ r [(nrow(r)/2+1):nrow(r),] # second half of simulated sequences n _ nrow(x) # We compute the following statistics: # # xdot: vector of sequence means # s2: vector of sequence sample variances (dividing by n-1) # W = mean(s2): within MS # B = n*var(xdot): between MS. # muhat = mean(xdot): grand mean; unbiased under strong stationarity # varW = var(s2)/m: estimated sampling var of W # varB = B^2 * 2/(m+1): estimated sampling var of B # covWB = (n/m)*(cov(s2,xdot^2) - 2*muhat*cov(s^2,xdot)): # estimated sampling cov(W,B) # sig2hat = ((n-1)/n))*W + (1/n)*B: estimate of sig2; unbiased under # strong stationarity # quantiles: emipirical quantiles from last half of simulated sequences xdot _ as.vector(col.means(x)) s2 _ as.vector(col.vars(x)) W _ mean(s2) B _ n*var(xdot) muhat _ mean(xdot) varW _ var(s2)/m varB _ B^2 * 2/(m-1) covWB _ (n/m)*(cov(s2,xdot^2) - 2*muhat*cov(s2,xdot)) sig2hat _ ((n-1)*W + B)/n quantiles _ quantile (as.vector(x), probs=c(.025,.25,.5,.75,.975)) if (W > 1.e-8) { # non-degenerate case # Posterior interval post.range combines all uncertainties # in a t interval with center muhat, scale sqrt(postvar), # and postvar.df degrees of freedom. # # postvar = sig2hat + B/(mn): variance for the posterior interval # The B/(mn) term is there because of the # sampling variance of muhat. # varpostvar: estimated sampling variance of postvar postvar _ sig2hat + B/(m*n) varpostvar _ (((n-1)^2)*varW + (1+1/m)^2*varB + 2*(n-1)*(1+1/m)*covWB)/n^2 post.df _ chisqdf (postvar, varpostvar) post.range _ muhat + sqrt(postvar) * qt(1-alpha/2, post.df)*c(-1,0,1) # Estimated potential scale reduction (that would be achieved by # continuing simulations forever) has two components: an estimate and # an approx. 97.5% upper bound. # # confshrink = sqrt(postvar/W), # multiplied by sqrt(df/(df-2)) as an adjustment for the # width of the t-interval with df degrees of freedom. # # postvar/W = (n-1)/n + (1+1/m)(1/n)(B/W); we approximate the sampling dist. # of (B/W) by an F distribution, with degrees of freedom estimated # from the approximate chi-squared sampling dists for B and W. (The # F approximation assumes that the sampling dists of B and W are independent; # if they are positively correlated, the approximation is conservative.) varlo.df _ chisqdf (W, varW) confshrink.range _ sqrt (c(postvar/W, (n-1)/n + (1+1/m)*(1/n)*(B/W) * qf(.975, m-1, varlo.df)) * post.df/(post.df-2)) list(post=post.range, quantiles=quantiles, confshrink=confshrink.range) } else { # degenerate case: all entries in "data matrix" are identical list (post=muhat*c(1,1,1), quantiles=quantiles, confshrink=c(1,1)) } } ############################################################################## gpar.log _ function (r) { gp _ gpar(log(r)) list (post=exp(gp$post), quantiles=exp(gp$quantiles), confshrink=gp$confshrink)} gpar.logit _ function (r) { gp _ gpar(logit(r)) list (post=invlogit(gp$post), quantiles=invlogit(gp$quantiles), confshrink=gp$confshrink)} gpar.vec _ function (a) { # monitors the convergence of the gibbs sampler # a[i,j,k], i=1,...,n, j=1,...,m, k=1,...,d d _ dim(a)[3] check _ NULL for (k in (1:d)){ gp _ gpar(a[,,k]) check _ rbind (check, gp$confshrink) } check } "do.summary"<- function(a, v = c(0, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.975, 0.99, 1), coef.names = paste(1:(dim(a)[3]))) { x <- apply(a, 3, quantile, probs = v) dimnames(x) <- list(paste(round(100 * v, 0), "%", sep = ""), coef.names ) t(x) }