My implementation of Berry and Berry’s hierarchical Bayes algorithm for adverse events
[This article was first published on Realizations in Biostatistics, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
I’ve been working on this for quite some time (see here for a little background), so I’m pleased that it looks close to done at least as far as the core algorithm. It uses global variables for now, and I’m sure there are a couple of other bugs lurking, but here it is, after the jump.Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
const.sqrt2pi <- sqrt(2*3.14159265358979) logit <- function(x) return(x/(1-x)) thetacond.log <- function(b,j,thet=theta[b,j]) { res <- y[b,j]*(thet+gamm[b,j])-nt*log(1+exp(thet+gamm[b,j]))+ log(pi[b]*(thet==0) +(1-pi[b])/ (const.sqrt2pi*sigma.theta[b])*exp(-0.5*(thet-mu.theta[b])^2/sigma.theta[b]^2)) return(res) } thetabj.update <- function() { # this is a mixture MH step, as described in Berry and Berry (2004) for (b in 1:nbodysys) { for (j in 1:nae[b]) { # simulate new candidate - point mass at 0 and normal mixture if (runif(1)<0.5) { thetabj.cand <- 0 } else { thetabj.cand <- rnorm(1,mean=theta[b,j],sd=sigma.mh.theta) } #cat("b=",b,", j=",j,sep="") ccond.ratio <- exp(thetacond.log(b,j,thet=thetabj.cand)-thetacond.log(b,j)) if (theta[b,j]==0 && thetabj.cand==0 && runif(1) theta[b,j]<<-thetabj.cand #cat("cond=1, acceptance probability=",ccond.ratio,'\n',sep="") } else if (thetabj.cand==0 && theta[b,j]!=0 && runif(1)< ccond.ratio*exp(-0.5*theta[b,j]^2/sigma.mh.theta^2)/sigma.mh.theta/const.sqrt2pi) { theta[b,j]<<-thetabj.cand #cat("cond=2, acc prob=",ccond.ratio*exp(-0.5*theta[b,j]^2/sigma.mh.theta^2)/sigma.mh.theta/const.sqrt2pi,'\n',sep="") } else if (thetabj.cand!=0 && theta[b,j]==0 && runif(1) { theta[b,j]<<-thetabj.cand #cat("cond=3, acc prob=",ccond.ratio/exp(-0.5*theta[b,j]^2/sigma.mh.theta^2)*sigma.mh.theta*const.sqrt2pi,'\n',sep="") } else if (thetabj.cand!=0 && theta[b,j]!=0 && runif(1) { theta[b,j]<<-thetabj.cand #cat("cond=4, acceptance probability=",ccond.ratio,'\n',sep="") } #else cat('\n') } } } gammabj.cond.log <- function(b,j,g=gamm[b,j]) { tmp1 <- exp(theta[b,j]+g) tmp2 <- exp(g) return(y[b,j]*(theta[b,j]+g)-nt*log(1+tmp1)+x[b,j]*g-nc*log(1+tmp2) -0.5/sigma.gamma^2*(g-mu.gamma[b])^2) } gammabj.update <- function() { for (b in 1:nbodysys) { for (j in 1:nae[b]) { cand <- rnorm(1,mean=gamm[b,j],sd=sigma.mh.gamma) acc.prob <- exp(gammabj.cond.log(b,j,g=cand)-gammabj.cond.log(b,j)) #if (acc.prob>1) then acc.prob<-1 # not sure we need this for program if (runif(1)<<-cand } } } pib.update <- function() { for (b in 1:nbodysys) { pi[b] <<- rbeta(1,alpha.pi+sum(theta[b,1:nae[b]]==0,na.rm=T),beta.pi+nae[b]-sum(theta[b,1:nae[b]]==0,na.rm=T)) } } sigma.theta.update <- function() { for (b in 1:nbodysys) { sigma2.rand <- rgamma(1,nae[b]/2+alpha.theta, scale=1/beta.theta+0.5*sum((theta[b,1:nae[b]]-mu.theta[b])^2,na.rm=T)) # fixed sigma.theta[b] <<- 1/sqrt(sigma2.rand) } } sigma.gamma.update <- function() { tmp<-0 for (b in 1:nbodysys) { tmp <- tmp+sum((gamm[b,1:nae[b]]-mu.gamma[b])^2,na.rm=T) } sigma2.rand <- rgamma(1,0.5*sum(nae)+alpha.sigma.gamma,scale=1/beta.sigma.gamma+ 0.5*tmp) #fixed sigma.gamma <<- 1/sqrt(sigma.gamma) } tau.gamma0.update <- function() { tau2.gamma0 <- rgamma(1,0.5*nbodysys+alpha.tau.gamma,scale=1/beta.tau.gamma+0.5* sum((mu.gamma-mu.gamma0)^2)) #fixed tau.gamma0 <<- 1/sqrt(tau2.gamma0) } tau.theta0.update <- function() { tau2.theta0 <- rgamma(1,0.5*nbodysys+alpha.tau.theta,scale=1/beta.tau.theta+0.5* sum((mu.theta-mu.theta0)^2)) #fixed tau.theta0 <<- 1/sqrt(tau2.theta0) } mu.gamma.update <- function() { for (b in 1:nbodysys) { gibbs.mu <- (tau.gamma0^2*sum(gamm[b,1:nae[b]],na.rm=T)+sigma.gamma^2*mu.gamma0)/ (tau.gamma0^2*nae[b]+sigma.gamma^2) gibbs.sigma2 <- tau.gamma0^2*sigma.gamma^2/(tau.gamma0^2*nae[b]+sigma.gamma^2) mu.gamma[b] <<- rnorm(1,mean=gibbs.mu,sd=sqrt(gibbs.sigma2)) } } mu.theta.update <- function() { for (b in 1:nbodysys) { gibbs.mu <- (tau.theta0^2*sum(theta[b,1:nae[b]],na.rm=T)+sigma.theta^2*mu.theta0)/ (tau.theta0^2*nae[b]+sigma.theta^2) gibbs.sigma2 <- tau.theta0^2*sigma.theta^2/(tau.theta0^2*nae[b]+sigma.theta^2) mu.theta[b] <<- rnorm(1,mean=gibbs.mu,sd=sqrt(gibbs.sigma2)) } } mu.gamma0.update <- function() { gibbs.mu <- (tau.gamma00^2*sum(mu.gamma)+tau.gamma0^2*mu.gamma00) / (tau.gamma00^2*nbodysys+tau.gamma0^2) gibbs.sigma2 <- tau.gamma00^2*tau.gamma0^2/(tau.gamma00^2*nbodysys + tau.gamma0^2) mu.gamma0 <<- rnorm(1,mean=gibbs.mu,sd=sqrt(gibbs.sigma2)) } mu.theta0.update <- function() { gibbs.mu <- (tau.theta00^2*sum(mu.theta)+tau.theta0^2*mu.theta00) / (tau.theta00^2*nbodysys+tau.theta0^2) gibbs.sigma2 <- tau.theta00^2*tau.theta0^2/(tau.theta00^2*nbodysys + tau.theta0^2) mu.theta0 <<- rnorm(1,mean=gibbs.mu,sd=sqrt(gibbs.sigma2)) } alpha.pi.logcond <- function(alpha=alpha.pi) { if (alpha<=1) return(-Inf) else { return(nbodysys*(lgamma(alpha+beta.pi)-lgamma(alpha))+ (alpha+1)*sum(log(pi))-alpha*lambda.alpha) } } alpha.pi.update <- function() { cand <- rnorm(1,mean=alpha.pi,sd=sigma.mh.theta) acc.prob <- exp(alpha.pi.logcond(alpha=cand)-alpha.pi.logcond()) if (runif(1)<<-cand } beta.pi.logcond <- function(beta=beta.pi) { if (beta<=1) return(-Inf) else { return(nbodysys*(lgamma(beta+alpha.pi)-lgamma(beta))+ (beta+1)*sum(log(1-pi))-beta*lambda.beta) } } beta.pi.update <- function() { cand <- rnorm(1,mean=beta.pi,sd=sigma.mh.theta) acc.prob <- exp(beta.pi.logcond(beta=cand)-beta.pi.logcond()) if (runif(1)<<-cand } # set up constants as in sec 4 mu.theta00<-0 tau.theta00 <- sqrt(10) mu.gamma00 <- 0 tau.gamma00 <- sqrt(10) alpha.theta <- 3 beta.theta <- 1 alpha.theta0 <- 3 beta.theta0 <- 1 alpha.tau.gamma<-3 beta.tau.gamma <- 1 alpha.tau.theta <- 3 beta.tau.theta <- 1 alpha.sigma.gamma<-3 beta.sigma.gamma <- 1 lambda.alpha<-1 lambda.beta <- 1 # parms for the example in berry and berry ae.dat <- read.table(file="ae.dat",header=T) ae.dat$bodysysf <- factor(ae.dat$bodysys) nt <- 148 nc <- 132 nae <- tapply(ae.dat$aenum,ae.dat$bodysysf,length) nbodysys <- length(levels(ae.dat$bodysysf)) y <- matrix(NA,nrow=nbodysys,ncol=max(nae)) x <- matrix(NA,nrow=nbodysys,ncol=max(nae)) for (i in 1:nbodysys) { y[i,1:nae[i]] <- ae.dat$trtct[ae.dat$bodysysf==as.numeric(levels(ae.dat$bodysysf)[i])] x[i,1:nae[i]] <- ae.dat$contct[ae.dat$bodysysf==as.numeric(levels(ae.dat$bodysysf)[i])] } sigma.mh.theta <- 2 sigma.mh.gamma <- 2 # set up containers theta <- matrix(NA,nrow=nbodysys,ncol=max(nae)) gamm <- matrix(NA,nrow=nbodysys,ncol=max(nae)) pi <- rep(NA,length=nbodysys) sigma.theta <- rep(NA,length=nbodysys) sigma.gamma <- NA tau.gamma0 <- NA tau.theta0 <- NA mu.gamma <- rep(NA,length=nbodysys) mu.theta <- rep(NA,length=nbodysys) mu.gamma0 <- NA mu.theta0 <- NA alpha.pi <- NA beta.pi <- NA # initial values gamm <- logit(x/nc) theta <- logit(y/nt)-gamm pi <- rep(.5,length=nbodysys) sigma.theta <- rep(1,length=nbodysys) sigma.gamma <- 1 tau.gamma0 <- 1 tau.theta0 <- 1 mu.gamma <- rep(0,length=nbodysys) mu.theta <- rep(0,length=nbodysys) mu.gamma0 <- 0 mu.theta0 <- 1 alpha.pi <- 1.5 beta.pi <- 1.5 # chains n.iter <- 11000 n.burnin <- 1000 theta.chain <- array(NA,dim=c(nbodysys,max(nae),n.iter)) gammabj.chain <- array(NA,dim=c(nbodysys,max(nae),n.iter)) for (i in 1:n.iter) { thetabj.update() theta.chain[,,i] <- theta gammabj.update() gammabj.chain[,,i] <- gamm pib.update() sigma.theta.update() sigma.gamma.update() tau.gamma0.update() tau.theta0.update() mu.gamma.update() mu.theta.update() mu.gamma0.update() mu.theta0.update() alpha.pi.update() beta.pi.update() } for (b in 1:nbodysys) { for (j in 1:nae[b]) { if (b==1 && j==1) { cat(sprintf("%5s%5s%10s%10s\n","b","j","P(th=0)","P(th>0)")) cat(sprintf("%5s%5s%10s%10s\n","---","---","------","------")) } cat(sprintf("%5s%5d%10.3f%10.3f\n",names(nae)[b],j,sum(theta.chain[b,j,(n.burnin+1):n.iter]==0)/sum(!is.na(theta.chain[b,j,(n.burnin+1):n.iter])),sum(theta.chain[b,j,(n.burnin+1):n.iter]>0)/sum(!is.na(theta.chain[b,j,(n.burnin+1):n.iter])))) } } #mean(theta.chain[1,1,(n.burnin+1):n.iter],na.rm=T) plot(theta.chain[1,1,(n.burnin+1):n.iter],type='b',pch=0) plot(theta.chain[2,4,(n.burnin+1):n.iter],type='b') plot(theta.chain[2,4,1:n.iter],type='b') sum(theta.chain[2,4,])/n.iter gammabj.chain[1,1,(n.burnin+1):n.iter] plot(theta.chain[6,2,(n.burnin+1):n.iter],type='b') hist(theta.chain[1,1,(n.burnin+1):n.iter]) #acceptance rate foo <- theta.chain[1,1,(n.burnin+1):n.iter] sum(foo[2:(n.iter-n.burnin)]!=foo[1:(n.iter-n.burnin-1)])/(n.iter-n.burnin+1)
# data for Berry and Berry AE model bodysys aenum ae trtct contct 1 1 astenia/fatigue 57 40 1 2 fever 34 26 1 3 infection/fungal 2 0 1 4 infection/viral 3 1 1 5 malaise 27 20 3 1 anorexia 7 2 3 2 cendisiasis/oral 2 0 3 3 constipation 2 0 3 4 diarrhea 24 10 3 5 castroenteritis 3 1 3 6 nausea 2 7 3 7 vomiting 19 19 5 1 lymphadenopathy 3 2 6 1 dehydration 0 2 8 1 crying 2 0 8 2 insomnia 2 2 8 3 irritability 75 43 9 1 bronchitis 4 1 9 2 congestion/nasal 4 2 9 3 congestion/respiratory 1 2 9 4 cough 13 8 9 5 infection/upper_respiratory 28 20 9 6 laryngotracheobronchitis 2 1 9 7 pharyngitis 13 8 9 8 rhinorrhea 15 14 9 9 sinusitis 3 1 9 10 tonsillitis 2 1 9 11 wheezing 3 1 10 1 bite/sting 4 0 10 2 eczema 2 0 10 3 pruritis 2 1 10 4 rash 13 3 10 5 rash/diaper 6 2 10 6 rash/measles/rubella-like 8 1 10 7 rash/varicella-like 4 2 10 8 urticaria 0 2 10 9 viral/exanthema 1 2 11 1 conjunctivitis 0 2 11 2 otitis/media 18 14 11 3 otorrhea 2 1
I make no claims to the correctness of this code, but I have gotten results that are close to the Berry and Berry article.
To leave a comment for the author, please follow the link and comment on their blog: Realizations in Biostatistics.
R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.