Enhanced meboot package, simulating regression standard errors
Complete simulation using R available at: http://ssrn.com/abstract= 2295723
Note that one sets “sym=TRUE” in using the meboot function to symmetrize the maximum entropy (ME) density. Most students of quantitative sciences are familiar with the Durbin-Watson tests, needed because traditional standard errors (SEs) of regression coefficients are unreliable in the presence of autoregressive, AR(1), regression errors. However they may not know how unreliable!
Recall that ME bootstrap is a computer intensive tool for time series statistical inference problems where traditional confidence intervals can be unreliable. Vinod (textbook ch.9http://www.worldscibooks.com/
A simulation can be devised where the true SEs are known (from known error AR(1) parameters). It shows that confidence intervals for SEs using the traditional Chi-square density often has nearzero coverage of the true SE. The simulation also allows us to compare meboot with the moving block bootstrap (MBB), designed for m-dependent series. A short paper Maximum Entropy Bootstrap Simulations for Variance Estimation (July 18, 2013) available at: http://ssrn.com/abstract=
Each simulation takes about 13 hours of number crunching on my PC. The following illustrative code is for an abridged version. The results on so few observations and simulations are not expected to be anywhere close to the reliable bootstrap simulations. The reader might want to copy my code which produces an output table similar to the tables in http://ssrn.com/abstract=
#Lahiri2Sym #jasa page 240 march 2012 Lahiri #integer block length b, hk sequence=a'Qkna quad from #set.seed(235) #author H. D. Vinod, Fordham Univ. NY rm(list=ls()) options(prompt = " ", continue = " ", width = 68, useFancyQuotes = FALSE) require(meboot) ###function theta to get one parameter SE of interest here theta=function(yi,ui,coef.no=2){ reg1=lm(yi~ui) su=summary(reg1) #second col. contains standard errors of reg. coeff return(su$coef[coef.no,2]) } #returns standard error where coef.no=2 is slope, but can be changed ###function end ###function begin CI for std deviation Chi-sq based sd.interval = function(data, conf.level = 0.95, df) { chilower = qchisq((1 - conf.level)/2, df) chiupper = qchisq((1 - conf.level)/2, df, lower.tail = FALSE) v = var(data) low= sqrt(df * v/chiupper) upper= sqrt(df * v/chilower) list(low=low, upper=upper) } ###function sd.interval ends ###bootstrap matrix create function needs "coef.no" argument for theta bstar.meboot <- function(yt, theta, ui, level = 0.95, bigJ = iter, coef.no=2) { semy <- meboot(x = yt, reps = bigJ, expand.sd=FALSE, sym=TRUE)$ensemble n <- NROW(yt) m <- length(theta(yi=yt,ui=ui,coef.no=coef.no)) if(m!=1) stop("too many parameters in theta") bb <- matrix(NA, bigJ) for(j in 1:bigJ) { yy <- semy[,j] #select j-th column yt bb[j] <- theta(yi=yy, ui=ui, coef.no=coef.no) } return(bb) } ### end function bstar.meboot which finds J=1000? versions ### of the standard errors (the parameter of interest here) #begin simulation code for moving block boot require(boot) ###function bstar.MBB is similar to bstar.meboot bstar.MBB <- function(yt, theta, ui, level = 0.95, bigJ = iter, blocklen=5, coef.no=coef.no) { #sim="fixed" gives Moving block bootstrap in the call below out=tsboot(yt,statistic=theta, R=bigJ, sim="fixed", l=blocklen, ui=ui, coef.no=coef.no) bs=out$t list(out=out,bs=bs) } ##end of bstar.MBB function ###function covg.prob to determine coverage prob. and CI width covg.prob=function(xblow, xbup,true.se){ #author H. D. Vinod, Fordham Univ. NY #function to compute coverage prob. and width covg=0 #initialize wid=xbup-xblow #CI widths if (wid<0) print(c("Error lower limit xblow> upper", xblow, xbup)) if (true.se>= xblow & true.se <=xbup) covg=1 list(wid=wid, covg=covg) } ###function checks how well true value is covered by CI ###function fixed part of Lahiri specification fixed.part=function(bigT,smalli=3){ zi=rnorm(bigT) ui=(2+zi)*sqrt(smalli) return(ui) } ###function end ###function dgp sets up the DGP for each simulation dgp=function(ui, bet=c(1,-1), ei){ bigT=length(ui) if (length(ei) != bigT) stop("dgp ui, ei wrong lengths") yi=bet[1]+bet[2]*ui+ei return(yi) } ### dgp function ends #author H. D. Vinod, Fordham Univ. NY # the following code not only runs simulation but #collects the results in a nice table ready for publication #only after all above functions compile, code to simulate date() bet=c(1,-1) #true beta values chosen for simulation print("Lahiri2Sym.R is the name of the program") print(c("true regression intercept=1, slope=-1")) blocklen=5 #block length for MBB print(c("block length of MBB=", blocklen)) expan=1 #this is a relic of old meboot versions #by setting it at 1 it ignores the stuff print(c("expansion number used for meboot=", expan)) iter =20 #change this to a larger value ######################### #iter <- 1000 #notation is J in the text nsim=20 #change this to a larger value ######################### #nsim <- 1000#500#20#500 #notation is N in the text #Tseq=c(50,75,100,150) Tseq=20 #only one T is computed in test run #Tseq=c(100, 300) #change this ######################### len.Tseq=length(Tseq) ar1thet=c(0.3,0.6,0.9) #define models 1 to 3 #b=c(3, 5, 10, 20, 30, 50)#block sizes in Lahiri #ar1thet=c(0.3) #define models 1 to 3 set.seed(132) #### begin coef.no LOOP for (coef.no in 1:1){ if (coef.no==1){ print(c(" focus is on Intercept coefficient number ", coef.no))} if (coef.no==2){ print(c(" focus is on slope coefficient number ", coef.no))} ####begin ar1theta Loop for (ithet in 1:length(ar1thet)){ ar1theta=ar1thet[ithet] print(c("theta for AR(1)errors=",ar1theta)) out=matrix(NA,ncol=4, nrow=4*len.Tseq) #4 sets for each experiment #### begin loop over bigT for (jT in 1:len.Tseq){ bigT=Tseq[jT] print(c("sample size=", bigT)) seei=1/(1-ar1theta^2)# infinite sum 1+ thet^2 + thet^4 etc ui=fixed.part(bigT) #define the right hand regressor variable #since regressors are randomly created (fixed) #set.seed is needed above this call ane=rep(1,bigT)#column of ones bigx=cbind(ane,ui)#matrix of regressors xtxinv=solve(t(bigx)%*% bigx) #print(xtxinv) true.se0=sqrt(xtxinv[1,1]*seei) true.se1=sqrt(xtxinv[2,2]*seei) if (jT==1){ print(c("true SE intercept/ slope",true.se0,true.se1)) } #end if true.SE= sqrt(xtxinv[coef.no,coef.no]*seei) #need to initialize counters for each model counter.iid <- counter.MBB <- counter.meboot <- 0 wid.iid <- wid.meboot <- wid.MBB <- rep(NA, nsim) var.iid <- var.meboot <- var.MBB <- rep(NA, nsim) eyT.iid <- eyT.meboot <- eyT.MBB <- rep(NA, nsim) outw=matrix(NA,ncol=3, nrow=nsim) outv=matrix(NA,ncol=3, nrow=nsim) oute=matrix(NA,ncol=3, nrow=nsim) ### begin i loop with new regression errors for each i for (i in seq(1, nsim)){ ei <- arima.sim(list(order = c(1,0,0), ar = ar1theta), n = bigT) #Note that var(ei) is chi-sq r.v. with T-2 df #95 percent CI are (n-1) s^2/ chisq(alp/2, n-1) #### call sd.interval = function(data, conf.level = 0.95, df) se.b0=sqrt(xtxinv[1,1]*var(ei)) se.b1=sqrt(xtxinv[2,2]*var(ei)) if (i==1) print(c("seb0,seb1=",se.b0,se.b1)) sd.iid=sd.interval(data=ei, df=(bigT-2)) obs.SE= sqrt(xtxinv[coef.no,coef.no]*var(ei)) #### call seq covg.prob=function(xblow, xbup,true.se) #call details xblow=sd.iid$low xbup=sd.iid$upper if (xblow > xbup) print(c("Error, xblow=", xblow, xbup, "i=",i)) covg.iid=covg.prob(xblow=sd.iid$low, xbup=sd.iid$upper, true.se=true.SE) wid.iid[i]=covg.iid$wid eyT.iid[i]= obs.SE var.iid[i]= (obs.SE-true.SE)^2 counter.iid=counter.iid+covg.iid$covg #### call dgp=function(ui, bet=c(1,-1), ei) ## the call details` yt=dgp(ui=ui, ei=ei, bet=bet) bsMBB=bstar.MBB(yt,theta, bigT, bigJ=iter, blocklen=blocklen, coef.no=coef.no, ui=ui)$bs qnMBB <- quantile(bsMBB, c(0.025, 0.975), type = 8) covg.MBB=covg.prob(xblow=qnMBB[1], xbup=qnMBB[2], true.se=true.SE) wid.MBB[i]=covg.MBB$wid eyT.MBB[i]=mean(bsMBB) var.MBB[i]=var(bsMBB) counter.MBB=counter.MBB+covg.MBB$covg bsme0= bstar.meboot(yt=yt,theta, bigT, bigJ=iter, coef.no=coef.no, ui=ui) bsme=bsme0 +expan*(bsme0-mean(bsme0)) #these are (2x-xbar) when expan=1. Following for 95% CI qnmeboot <- quantile(bsme, c(0.025, 0.975), type = 8) #qnmeboot <- quantile(bsme, c(0.00005, 0.99995), type = 8) covg.meboot=covg.prob(xblow=qnmeboot[1], xbup=qnmeboot[2], true.se=true.SE) wid.meboot[i]=covg.meboot$wid #store width for i-th simulation eyT.meboot[i]=mean(bsme)#store average of SEs of coeff var.meboot[i]=var(bsme) counter.meboot=counter.meboot+covg.meboot$covg if(i<4){ print(c(counter.iid, counter.MBB,counter.meboot)) print(c(wid.iid[i], wid.MBB[i] ,wid.meboot[i])) print(c(eyT.iid[i],eyT.MBB[i] ,eyT.meboot[i])) print(c(var.iid[i],var.MBB[i] ,var.meboot[i])) } outw[i,1:3]=cbind(wid.iid[i], wid.MBB[i], wid.meboot[i]) oute[i,1:3]=cbind(eyT.iid[i], eyT.MBB[i], eyT.meboot[i]) outv[i,1:3]=cbind(var.iid[i], var.MBB[i], var.meboot[i]) }#end nsim loop over i outp=cbind(counter.iid, counter.MBB, counter.meboot)/nsim #apply(outc,MARGIN=2,mean) print(c("coverage prob", outp)) outw.mean=apply(outw,MARGIN=2,mean) oute.mean=apply(oute,MARGIN=2,mean) outv.mean=apply(outv,MARGIN=2,mean) print(date()) #inter-leaf the output, first row has probabilities and second has widths out1=rbind(outp, outw.mean,oute.mean,outv.mean) print(out1) print(date()) out[(4*jT-3),1]=nsim out[(4*jT-2),1]=bigT out[(4*jT-1),1]=ar1theta out[4*jT,1]=5 out[(4*jT-3),2:4]=outp out[(4*jT-2),2:4]=outw.mean out[(4*jT-1),2:4]=oute.mean out[4*jT,2:4]=outv.mean print(c("above results have T= ", bigT)) } #end of bigT loop print(out) require(xtable) print(xtable(out, digits=4)) print(c("Above ar1theta=", ar1theta)) } #end of ar1theta loop print(c("coefficient number above", coef.no)) } #end of coef.no loop print("Symmetric ME density, sym=TRUE, expand.sd=FALSE") print(date())