## S functions to derive posterior simulations for the parameters of a
## hierarchical normal linear regression

###############################################################################
#  									      #
# The function rerun does all the dirty work.  It sets up the problem as      #
# a non-hierarchical regression (possibly with many different variances)      #
# and calls various subroutines to get initial estimates of the variances     #
# and their EM estimates.  Then procedures are called to approximate the      #
# second derivative of the posterior density near the EM mode.  Using these   #
# estimates, a procedure is called to generate starting points for the Gibbs  #
# sampler, and then finally the Gibbs sampler is run.  Diagnostics for the    #
# Gibbs sampler are returned as well as a summary of the posterior.           #
#  									      #
# Function: rerun							      #
# Arguments: 								      #
#	x	n by k matrix of covariates				      #
#  									      #
#	y	n by 1 vector of data					      #
#  									      #
#       y.gr	n by 1 vector of integers indicating which observations have  #
#		variance sigma_1^2,...,sigma_I^2.			      #
#  									      #
#	b.gr 	k by 1 vector of integers indicating the grouping of fixed    #
#		and random effects.  If all the beta's are fixed can leave    #
#		as NULL, otherwise put 0 for fixed effects, 1's for first     #
#		group of betas,..., J's for last group of betas		      #
#  									      #
#	Hy	if NULL, the y's have variance determined by y.gr and 	      #
#		{sigma_i^2}.  If this is not true can specify a known Hy      #
#		(precision multiplier) as a matrix (see chol.Hy) or as a      #
#		diagonal vector (i.e. var(y_1)=s^2, var(y_2)=2_s^2)	      #
#  									      #
#	chol.Hy if TRUE, then if Hy is specified as a matrix, it is assumed   #
#		to be the cholesky of the precision.  Otherwise the algorithm #
#		will find the cholesky of Hy during runtime.		      #
#  									      #
#	Ha	if NULL, then Ha is assumed to be an l by l matrix of zeros.  #
#		Specify this if you want an informative prior on the hyper    #
#		parameters.						      #
#  									      #
#	chol.Ha see chol.Hy						      #
#  									      #
#	b0	the prior mean for the betas.  If NULL, taken to be zeros.    #
#  									      #
#	a0	the prior mean for the alphas.  If NULL, taken to be zeros.   #
#  									      #
#	Xb 	the matrix of parameter covariates.  This should be k by l.   #
#		If NULL, then there are no alphas in the model.		      #
#  									      #
#	init 	initial estimates for the variance components.  If NULL, the  #
#		program makes quick and dirty estimates which will            #
#		hopefully not slow down the EM too badly.  If you want to     #
#  		specify these, make sure to give I plus J of them.	      #
#  									      #
#	tol	a constant used in EM convergence check.  You might change    #
#		this if the EM takes forever to converge.		      #
#  									      #
#	maxreps the maximum number of iterations for the EM algorithm to do.  #
#  									      #
#	delta	the fraction of change to use for the numerical approx to the #
#		information matrix.  thi
#
#description is unfinished . . . . . . . . . . . . . . . . . . . . . . . . .
#
###############################################################################


rerun <- function(x, y, y.gr, b.gr = NULL, Hy = NULL, chol.Hy = F, 
	Ha = NULL, chol.Ha = F, b0 = NULL, a0 = NULL, 
	Xb = NULL, Ha = NULL, init = NULL, tol = 1e-07, maxreps = 30,
	delta = 0.1, eta =4, g.reps = 1000, num.trails = 10, verbose = 1){

	# do checks
	# ! NOT Implemented yet!

	n_length(y)
	k_dim(x)[2]
	gr <- codes(ordered(y.gr))
	if (!is.null(Hy)) {
		if (!chol.Hy) {Hy <- chol(Hy)}
		y <- Hy %*% y
		x <- Hy %*% x
		}
	I <- max(gr)
	ni <- as.vector(table(gr))
	pr <- rep(2, I)
	if (!is.null(b.gr)) {
		if (any(b.gr==0)) {b.gr <- codes(ordered(b.gr)) - 1}
		else {b.gr <- codes(ordered(b.gr))}
		J <- max(b.gr)
		if (is.null(b0)) {b0 <- rep(0, k)}
		y <- c(y, b0[b.gr > 0])
		x <- rbind(x, diag(k)[b.gr > 0, ])
		k.st <- sum(b.gr > 0)
		gr <- c(gr, b.gr[b.gr > 0] + I)
# changed	pr <- c(pr, rep(0, J))		# old
		pr <- c(pr, rep(1, J))		# new
		ni <- c(ni, as.vector(table(b.gr[b.gr > 0])))
		I <- I + J
		if (!is.null(Xb)){
			if (dim(Xb)[1] != k) {
				stop("# of rows of Xb must = # of cols of X")}
			l <- dim(Xb)[2]
# 2 mistakes? fixed below.  x <- cbind(x, rbind(matrix(0, n, k), Xb[(b.gr > 0), ]))
			x <- cbind(x, rbind(matrix(0, n, l), -Xb[(b.gr > 0), ]))
			if (is.null(Ha)) {l.st <- 0}
			else {a.gr <- (diag(Ha) > 0)
				l.st <- sum(sum(a.gr))}
			if (is.null(a0)) {a0 <- rep(0,l)}
			if (l.st > 0) {
				Ha <- Ha[a.gr > 0, a.gr > 0]
				if (!chol.Ha) {Ha <- chol(Ha)}
				a0 <- a0[a.gr > 0]
				a0 <- Ha %*% a0
				y <- c(y, a0)
				il <- diag(l)[a.gr > 0, ]
				il <- Ha %*% il
				x <- rbind(x, cbind(matrix(0, l.st, k), il))
				}
			}
		}
	if (is.null(init)) {init <- get.init(I, x, y, gr, pr)}
	v.mode <- em(x, y, init, gr, pr, ni, I, tol, maxreps, verbose)
	if(verbose>0) cat("Em found mode \n")
	if(verbose>1) print(v.mode)
	conv <- v.mode$converged
	v.mode <- v.mode$var.mode
	info.mode <- - d2pdv2(x, y, v.mode, gr, pr, I, delta)
	if(verbose>0) cat("cov estimated \n")
	if(verbose>1) print(solve(info.mode))
	v.start <- get.start(v.mode, info.mode, eta, num.trails)
	if(verbose>0) cat("starting points \n")
	if(verbose>1) print(v.start)
	post.sim <- run.gibbs(x, y, v.start, gr, pr, I, ni, k, g.reps,
		num.trails, verbose)
	return(var.mode = v.mode, converged = conv, cov.est = solve(info.mode),
		start.pts = v.start, post.dist = post.sim)
	}

get.init <- function(n, x, y, gr, pr){
	return((n:1)+5)}

get.start <- function(var.mode, info.mode, eta, numt){
	mat <- matrix(NA, numt, length(var.mode))
	sqrtsig <- t(solve(chol(info.mode)))
	i <- 1
	while (i < (numt+1)){
		vec <- rmvt(var.mode, sqrtsig, eta)
		if (all(vec > 0)) {
			mat[i, ] <- vec 
			i <- i + 1
			}
		}
	return(mat)}

em <- function(x, y, init, gr, pr, ni, I, tol, maxreps, verbose=1){
	conv.flag <- F
	post.curr <-  -1e99 	#Set this to avoid 1 step convergence
	vc <- init
	for (m in 1:maxreps){
		if (verbose>1) cat(m,"\n")
		qr.o <- do.qr(x, vc[gr], y, do.res = T)
		r.inv <- utmsolve(qr.o$R)
		s2.vec <- diag.xxt(x%*%r.inv)
		vl <- vc 
		for (i in 1:I){
			vc[i] <- vl[i] * sum(qr.o$res[gr==i]^2)
			vc[i] <- vc[i] + sum(s2.vec[gr==i])
			vc[i] <- vc[i] / (pr[i] + ni[i])
			}
		if (verbose>1) cat(vc,"\n")
		post.last <- post.curr
		post.curr <- calc.post(vl, gr, pr, qr.o)
		if (verbose>1) cat(post.curr,"\n")
		if (abs((post.curr-post.last)/post.curr) < tol){
			conv.flag <- T
			break
			}
		}
	return(list(var.mode = vc, converged = conv.flag))
	}


do.qr <- function(x, v, y = NULL, do.res = F, do.coef = F){
	x <- updat(x, v)
	y <- updat(y, v)
	qr.o <- qr(x)
	R.m <- utm(qr.o$qr[, qr.o$pivot])
	if (do.res) {resids <- qr.resid(qr.o,y)}
	else {resids <- null()}
	if (do.coef) {coeffs <- qr.coef(qr.o,y)}
	else {coeffs <- null()}
	return(list(R =  R.m, res = resids, coef = coeffs))
	}

d2pdv2 <- function(x, y, v, gr, pr, I, delta){
	qr.o <- NULL
	if (length(delta)==1) {delta <- rep(delta, I)}
	dmat <- matrix(0, I, I)
	for (i in 1:I){
		ui <- delta[i] * ((1:I)==i) * v[i]
		for (j in i:I){
			uj <- delta[j] * ((1:I)==j) * v[j]
			dmat[i,j] <- sum(
				+calc.post(v + ui + uj, gr, pr, qr.o, x, y),
				-calc.post(v - ui + uj, gr, pr, qr.o, x, y),
				-calc.post(v + ui - uj, gr, pr, qr.o, x, y),
				+calc.post(v - ui - uj, gr, pr, qr.o, x, y)
				)/(4*delta[i]*delta[j]*v[i]*v[j])
		}
	}
	dmat[row(dmat)>col(dmat)] <- t(dmat)[row(dmat)>col(dmat)]
	return(dmat)
}

run.gibbs <- function(x, y, v.start, gr, pr, I, ni, k, g.reps=100, num.tr=4,
  verbose=1){
#	post.sim <- array(NA, c(g.reps, num.tr, k + I))		# error?
	post.sim <- array(NA, c(g.reps, num.tr, k + l + I))	# fixed?
	v.last <- v.start
	for (m in 1:g.reps){
		if (verbose>0) cat("Iteration ",m,"\r")
		for (trail in 1:num.tr){
			qr.o <- do.qr(x, v.last[trail,][gr], y, do.res = T,
				do.coef = T)
			gamma.sim <- rmnorm(qr.o$coef, t(utmsolve(qr.o$R)))
			sim.res <- (y - x %*% gamma.sim)
			for(i in 1:I){
				v.last[trail, i] <- sum(sim.res[gr==i]^2)/
					rchisq(1, ni[i] - (2 - pr[i]))
				}
			post.sim[m, trail, ] <- c(gamma.sim, v.last[trail,])
			}
		}
	if (verbose>0) cat("\n")
	return(post.sim)}


calc.post <- function(v, gr, pr, qr.o = NULL, x = NULL, y = NULL){
	v.big <- v[gr]
	if (is.null(qr.o)) {
		qr.o <- do.qr(x, v.big, y, do.res = T)
		}
	det <- 1/(prod(diag(qr.o$R))^2)
	return(sum(c(.5*log(det), -.5*sum(log(c(v.big, rep(v, pr)))),
		-.5*sum(qr.o$res^2))))
	}

updat <- function(thing, var){
	return(thing/sqrt(var))
	}

rmnorm <- function(mu, sqrtsig){
	vec <- rnorm(length(mu))
	t(sqrtsig) %*% vec + mu
	}

rmvt <- function(mu, sqrtsig, dof){
	vec <- rmnorm(mu, sqrtsig)
	return(vec * sqrt(dof/rchisq(1,dof)))
	}

tr <- function(mat){
	sum(diag(mat))
	}	


utm <- function(mat){
	mat <- mat[seq(min(dim(mat))),]
	mat[row(mat) > col(mat)] <- 0
	return(mat)
	}


diag.xxt <-  function(x)
{
	n <- dim(x)[1]
	p <- dim(x)[2]
	resvec <- as.double(rep(0,n))
	if(!is.loaded(C.symbol("diagxxt_from_S")))
		dyn.load("re.o")
	storage.mode(x) <- "double"
	result <- .C("diagxxt_from_S",
		x,
		answer = resvec,
		as.integer(n),
		as.integer(p))
	return(result$answer)
}

utmsolve <- function(x)
{
	n <- dim(x)[1]
	if (n!=(dim(x)[2])) stop("x must be square")
	storage.mode(x) <- "double"
	xinv <- x 
	if(!is.loaded(C.symbol("uppersolve_from_S")))
		dyn.load("re.o")
	result <- .C("uppersolve_from_S",
		x,
		answer = xinv,
		as.integer(n),
		status = as.integer(1))
	if (!result$status) stop("matrix not invertible")
	return(result$answer)
}