Enhanced meboot package, simulating regression standard errors
Complete simulation using R available at: http://ssrn.com/abstract= 2295723
Note that one sets “sym=TRUE” in using the meboot function to symmetrize the maximum entropy (ME) density. Most students of quantitative sciences are familiar with the Durbin-Watson tests, needed because traditional standard errors (SEs) of regression coefficients are unreliable in the presence of autoregressive, AR(1), regression errors. However they may not know how unreliable!
Recall that ME bootstrap is a computer intensive tool for time series statistical inference problems where traditional confidence intervals can be unreliable. Vinod (textbook ch.9http://www.worldscibooks.com/
A simulation can be devised where the true SEs are known (from known error AR(1) parameters). It shows that confidence intervals for SEs using the traditional Chi-square density often has nearzero coverage of the true SE. The simulation also allows us to compare meboot with the moving block bootstrap (MBB), designed for m-dependent series. A short paper Maximum Entropy Bootstrap Simulations for Variance Estimation (July 18, 2013) available at: http://ssrn.com/abstract=
Each simulation takes about 13 hours of number crunching on my PC. The following illustrative code is for an abridged version. The results on so few observations and simulations are not expected to be anywhere close to the reliable bootstrap simulations. The reader might want to copy my code which produces an output table similar to the tables in http://ssrn.com/abstract=
#Lahiri2Sym
#jasa page 240 march 2012 Lahiri
#integer block length b, hk sequence=a'Qkna quad from
#set.seed(235)
#author H. D. Vinod, Fordham Univ. NY
rm(list=ls())
options(prompt = " ", continue = " ", width = 68,
useFancyQuotes = FALSE)
require(meboot)
###function theta to get one parameter SE of interest here
theta=function(yi,ui,coef.no=2){
reg1=lm(yi~ui)
su=summary(reg1) #second col. contains standard errors of reg. coeff
return(su$coef[coef.no,2])
} #returns standard error where coef.no=2 is slope, but can be changed
###function end
###function begin CI for std deviation Chi-sq based
sd.interval = function(data, conf.level = 0.95, df) {
chilower = qchisq((1 - conf.level)/2, df)
chiupper = qchisq((1 - conf.level)/2, df, lower.tail = FALSE)
v = var(data)
low= sqrt(df * v/chiupper)
upper= sqrt(df * v/chilower)
list(low=low, upper=upper)
}
###function sd.interval ends
###bootstrap matrix create function needs "coef.no" argument for theta
bstar.meboot <- function(yt, theta, ui,
level = 0.95, bigJ = iter, coef.no=2) {
semy <- meboot(x = yt, reps = bigJ,
expand.sd=FALSE, sym=TRUE)$ensemble
n <- NROW(yt)
m <- length(theta(yi=yt,ui=ui,coef.no=coef.no))
if(m!=1) stop("too many parameters in theta")
bb <- matrix(NA, bigJ)
for(j in 1:bigJ) {
yy <- semy[,j] #select j-th column yt
bb[j] <- theta(yi=yy, ui=ui, coef.no=coef.no)
}
return(bb)
}
### end function bstar.meboot which finds J=1000? versions
### of the standard errors (the parameter of interest here)
#begin simulation code for moving block boot
require(boot)
###function bstar.MBB is similar to bstar.meboot
bstar.MBB <- function(yt, theta, ui,
level = 0.95, bigJ = iter, blocklen=5, coef.no=coef.no) {
#sim="fixed" gives Moving block bootstrap in the call below
out=tsboot(yt,statistic=theta, R=bigJ, sim="fixed", l=blocklen,
ui=ui, coef.no=coef.no)
bs=out$t
list(out=out,bs=bs)
} ##end of bstar.MBB function
###function covg.prob to determine coverage prob. and CI width
covg.prob=function(xblow, xbup,true.se){
#author H. D. Vinod, Fordham Univ. NY
#function to compute coverage prob. and width
covg=0 #initialize
wid=xbup-xblow #CI widths
if (wid<0)
print(c("Error lower limit xblow> upper", xblow, xbup))
if (true.se>= xblow & true.se <=xbup) covg=1
list(wid=wid, covg=covg)
}
###function checks how well true value is covered by CI
###function fixed part of Lahiri specification
fixed.part=function(bigT,smalli=3){
zi=rnorm(bigT)
ui=(2+zi)*sqrt(smalli)
return(ui)
}
###function end
###function dgp sets up the DGP for each simulation
dgp=function(ui, bet=c(1,-1), ei){
bigT=length(ui)
if (length(ei) != bigT) stop("dgp ui, ei wrong lengths")
yi=bet[1]+bet[2]*ui+ei
return(yi)
}
### dgp function ends
#author H. D. Vinod, Fordham Univ. NY
# the following code not only runs simulation but
#collects the results in a nice table ready for publication
#only after all above functions compile, code to simulate
date()
bet=c(1,-1) #true beta values chosen for simulation
print("Lahiri2Sym.R is the name of the program")
print(c("true regression intercept=1, slope=-1"))
blocklen=5 #block length for MBB
print(c("block length of MBB=", blocklen))
expan=1 #this is a relic of old meboot versions
#by setting it at 1 it ignores the stuff
print(c("expansion number used for meboot=", expan))
iter =20 #change this to a larger value #########################
#iter <- 1000 #notation is J in the text
nsim=20 #change this to a larger value #########################
#nsim <- 1000#500#20#500 #notation is N in the text
#Tseq=c(50,75,100,150)
Tseq=20 #only one T is computed in test run
#Tseq=c(100, 300) #change this #########################
len.Tseq=length(Tseq)
ar1thet=c(0.3,0.6,0.9) #define models 1 to 3
#b=c(3, 5, 10, 20, 30, 50)#block sizes in Lahiri
#ar1thet=c(0.3) #define models 1 to 3
set.seed(132)
#### begin coef.no LOOP
for (coef.no in 1:1){
if (coef.no==1){
print(c(" focus is on Intercept coefficient number ", coef.no))}
if (coef.no==2){
print(c(" focus is on slope coefficient number ", coef.no))}
####begin ar1theta Loop
for (ithet in 1:length(ar1thet)){
ar1theta=ar1thet[ithet]
print(c("theta for AR(1)errors=",ar1theta))
out=matrix(NA,ncol=4, nrow=4*len.Tseq) #4 sets for each experiment
#### begin loop over bigT
for (jT in 1:len.Tseq){
bigT=Tseq[jT]
print(c("sample size=", bigT))
seei=1/(1-ar1theta^2)# infinite sum 1+ thet^2 + thet^4 etc
ui=fixed.part(bigT) #define the right hand regressor variable
#since regressors are randomly created (fixed)
#set.seed is needed above this call
ane=rep(1,bigT)#column of ones
bigx=cbind(ane,ui)#matrix of regressors
xtxinv=solve(t(bigx)%*% bigx)
#print(xtxinv)
true.se0=sqrt(xtxinv[1,1]*seei)
true.se1=sqrt(xtxinv[2,2]*seei)
if (jT==1){
print(c("true SE intercept/ slope",true.se0,true.se1))
} #end if
true.SE= sqrt(xtxinv[coef.no,coef.no]*seei)
#need to initialize counters for each model
counter.iid <- counter.MBB <- counter.meboot <- 0
wid.iid <- wid.meboot <- wid.MBB <- rep(NA, nsim)
var.iid <- var.meboot <- var.MBB <- rep(NA, nsim)
eyT.iid <- eyT.meboot <- eyT.MBB <- rep(NA, nsim)
outw=matrix(NA,ncol=3, nrow=nsim)
outv=matrix(NA,ncol=3, nrow=nsim)
oute=matrix(NA,ncol=3, nrow=nsim)
### begin i loop with new regression errors for each i
for (i in seq(1, nsim)){
ei <- arima.sim(list(order = c(1,0,0), ar = ar1theta), n = bigT)
#Note that var(ei) is chi-sq r.v. with T-2 df
#95 percent CI are (n-1) s^2/ chisq(alp/2, n-1)
#### call sd.interval = function(data, conf.level = 0.95, df)
se.b0=sqrt(xtxinv[1,1]*var(ei))
se.b1=sqrt(xtxinv[2,2]*var(ei))
if (i==1) print(c("seb0,seb1=",se.b0,se.b1))
sd.iid=sd.interval(data=ei, df=(bigT-2))
obs.SE= sqrt(xtxinv[coef.no,coef.no]*var(ei))
#### call seq covg.prob=function(xblow, xbup,true.se) #call details
xblow=sd.iid$low
xbup=sd.iid$upper
if (xblow > xbup) print(c("Error, xblow=", xblow, xbup, "i=",i))
covg.iid=covg.prob(xblow=sd.iid$low, xbup=sd.iid$upper, true.se=true.SE)
wid.iid[i]=covg.iid$wid
eyT.iid[i]= obs.SE
var.iid[i]= (obs.SE-true.SE)^2
counter.iid=counter.iid+covg.iid$covg
#### call dgp=function(ui, bet=c(1,-1), ei) ## the call details`
yt=dgp(ui=ui, ei=ei, bet=bet)
bsMBB=bstar.MBB(yt,theta, bigT, bigJ=iter,
blocklen=blocklen, coef.no=coef.no, ui=ui)$bs
qnMBB <- quantile(bsMBB, c(0.025, 0.975), type = 8)
covg.MBB=covg.prob(xblow=qnMBB[1], xbup=qnMBB[2], true.se=true.SE)
wid.MBB[i]=covg.MBB$wid
eyT.MBB[i]=mean(bsMBB)
var.MBB[i]=var(bsMBB)
counter.MBB=counter.MBB+covg.MBB$covg
bsme0= bstar.meboot(yt=yt,theta, bigT,
bigJ=iter, coef.no=coef.no, ui=ui)
bsme=bsme0 +expan*(bsme0-mean(bsme0))
#these are (2x-xbar) when expan=1. Following for 95% CI
qnmeboot <- quantile(bsme, c(0.025, 0.975), type = 8)
#qnmeboot <- quantile(bsme, c(0.00005, 0.99995), type = 8)
covg.meboot=covg.prob(xblow=qnmeboot[1], xbup=qnmeboot[2],
true.se=true.SE)
wid.meboot[i]=covg.meboot$wid #store width for i-th simulation
eyT.meboot[i]=mean(bsme)#store average of SEs of coeff
var.meboot[i]=var(bsme)
counter.meboot=counter.meboot+covg.meboot$covg
if(i<4){
print(c(counter.iid, counter.MBB,counter.meboot))
print(c(wid.iid[i], wid.MBB[i] ,wid.meboot[i]))
print(c(eyT.iid[i],eyT.MBB[i] ,eyT.meboot[i]))
print(c(var.iid[i],var.MBB[i] ,var.meboot[i]))
}
outw[i,1:3]=cbind(wid.iid[i], wid.MBB[i], wid.meboot[i])
oute[i,1:3]=cbind(eyT.iid[i], eyT.MBB[i], eyT.meboot[i])
outv[i,1:3]=cbind(var.iid[i], var.MBB[i], var.meboot[i])
}#end nsim loop over i
outp=cbind(counter.iid, counter.MBB, counter.meboot)/nsim
#apply(outc,MARGIN=2,mean)
print(c("coverage prob", outp))
outw.mean=apply(outw,MARGIN=2,mean)
oute.mean=apply(oute,MARGIN=2,mean)
outv.mean=apply(outv,MARGIN=2,mean)
print(date())
#inter-leaf the output, first row has probabilities and second has widths
out1=rbind(outp, outw.mean,oute.mean,outv.mean)
print(out1)
print(date())
out[(4*jT-3),1]=nsim
out[(4*jT-2),1]=bigT
out[(4*jT-1),1]=ar1theta
out[4*jT,1]=5
out[(4*jT-3),2:4]=outp
out[(4*jT-2),2:4]=outw.mean
out[(4*jT-1),2:4]=oute.mean
out[4*jT,2:4]=outv.mean
print(c("above results have T= ", bigT))
} #end of bigT loop
print(out)
require(xtable)
print(xtable(out, digits=4))
print(c("Above ar1theta=", ar1theta))
} #end of ar1theta loop
print(c("coefficient number above", coef.no))
} #end of coef.no loop
print("Symmetric ME density, sym=TRUE, expand.sd=FALSE")
print(date())