# read in the data from an excel-format ".csv" file hiv.data <- read.table ("allvar.csv", sep=",", header=T) attach.all (hiv.data) # use r to convert dates to "julian dates" (number of days since 1 jan 1960) vdate <- as.character (strptime (as.character (VDATE), "%m/%d/%Y")) v.year <- as.numeric (substr (vdate, 1, 4)) v.month <- as.numeric (substr (vdate, 6, 7)) v.day <- as.numeric (substr (vdate, 9, 10)) library (survival) v.date <- as.numeric (mdy.date (v.month, v.day, v.year)) n.persons <- max(newpid) initial.date <- rep (NA, n.persons) for (j in 1:n.persons){ initial.date[j] <- min (v.date[newpid==j], na.rm=T) } relative.date <- v.date - initial.date[newpid] # just consider the "control" patients (treatmnt==1) and with initial age between 1 and 5 years ok <- treatmnt==1 & !is.na(CD4PCT) & (baseage>1 & baseage<5) ok[is.na(ok)] <- FALSE y <- CD4PCT[ok] time <- relative.date[ok]/365.24 pid.subset <- newpid[ok] unique.pid <- unique (pid.subset) n <- length (y) J <- length (unique.pid) person <- rep (NA, n) for (j in 1:J){ person[pid.subset==unique.pid[j]] <- j } # analyze cd4 on the square root scale! (we'll discuss this) y <- sqrt(y) # ok, now the dataset is complete! fit the bugs model data <- list ("y", "person", "time", "n", "J") inits <- function (){ list (a=rnorm(J), b=runif(J), mu.a=rnorm(1), mu.b=rnorm(1), sigma.y=runif(1), sigma.a=runif(1), sigma.b=runif(1)) } params <- c ("a", "b", "mu.a", "mu.b", "sigma.y", "sigma.a", "sigma.b") sa.1 <- bugs (data, inits, params, "southafrica.1.txt", n.chains=3, n.iter=1000) # check to see if the slopes and intercepts are strongly correlated (if so, we maybe should center the x-variable before including it in the regression) # we'll plot 16 simulations of a,b attach.bugs (sa.1) par(mfrow=c(4,4)) for (i in 1:16)plot(a[i,],b[i,]) corrs <- rep (NA, n.sims) for (i in 1:n.sims){ corrs[i] <- cor (a[i,],b[i,]) } # create a fake dataset from 400 new kids n.reps <- 10 diff.summary <- array (NA, c(n.reps, 9)) for (rep in 1:n.reps){ J.new <- 400 T.new <- 7 n.new <- J.new*T.new treatment.new <- rep (c(1,2), rep (J.new/2, 2)) person.new <- rep (1:J.new, rep (T.new, J.new)) time.new <- rep (seq(0,1,length=T.new), J.new) a.new <- rnorm (J.new, sa.1$mean$mu.a, sa.1$mean$sigma.a) b.new <- c (rnorm (J.new/2, sa.1$mean$mu.b, sa.1$mean$sigma.b), rnorm (J.new/2, sa.1$mean$mu.b/2, sa.1$mean$sigma.b/2)) y.new <- rnorm (J.new*T.new, a.new[person.new] + b.new[person.new]*time.new, sa.1$mean$sigma.y) # fit the new bugs model data <- list (y=y.new, person=person.new, time=time.new, n=n.new, J=J.new, treatment=treatment.new) inits <- function (){ list (a=rnorm(J.new), b=runif(J.new), mu.a=rnorm(1), mu.b=rnorm(2), sigma.y=runif(1), sigma.a=runif(1), sigma.b=runif(2), xi.a=rnorm(1), xi.b=rnorm(2)) } params <- c ("a.adj", "b.adj", "mu.a.adj", "mu.b.adj", "sigma.y", "sigma.a.adj", "sigma.b.adj", "diff") sa.new <- bugs (data, inits, params, "southafrica.new.1.bug", n.chains=3, n.iter=2000) diff.summary[rep,] <- summary["diff",] } # check corr between y_1 and regression y.0.new <- y.new[time.new==0] slope.new <- rep (NA, J.new) for (j in 1:J.new){ slope.new[j] <- lm (y.new ~ time.new, subset=person.new==j)$coef[2] } cor (y.0.new, slope.new)