Analytical and simulation-based power analyses for mixed-design ANOVAs

[This article was first published on R Psychologist » R, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

In this post I show some R-examples on how to perform power analyses for mixed-design ANOVAs. The first example is analytical — adapted from formulas used in G*Power (Faul et al., 2007), and the second example is a Monte Carlo simulation. The source code is embedded at the end of this post.

Both functions require a dataframe, containing the parameters that will be used in the power calculations. Here is an example using three groups and three time-points.

# design -------
# mus
CT <- c(34.12, 21, 17.44)
BA <- c(36.88, 16.82, 8.75) 
ADM <- c(35.61, 14.39, 7.78)

study <- data.frame("group" = gl(3,3, labels=c("CT", "BA", "ADM")))
study$time <- gl(3,1,9, labels=c("Intake", "8 weeks", "16 weeks"))

study$DV <- c(CT, BA, ADM) 
study$SD <- 10

ggplot(study, aes(time, DV, group=group, linetype=group, shape=group)) + 
    geom_line() + 
    geom_point()

Here is a plot of our hypothetical study design.
Study design for power analysis for mixed-design ANOVA
Now, we will use this design to perform a power analysis using anova.pwr.mixed and anova.pwr.mixed.sim.

# analytical ----------
anova.pwr.mixed(data = study, Formula = "DV ~ time*group",
 n=10, m=3, rho=0.5)


   Terms      power n.needed
b  group      0.197       NA
w1 time       1.000       NA
w2 time:group 0.617       NA

# monte carlo ------------
anova.pwr.mixed.sim(data=study, Formula="DV ~ time*group + Error(subjects)",
 FactorA="group", n=10, rho=0.5, sims=100)


       terms power
1  group      0.19
2 time        1.00
3 time:group  0.64

Comparison of analytical and monte carlo power analysis

Now let’s compare the two functions’ results on the time x group-interaction. Hopefully, the two methods will yield the same result.

# compare
samples <- seq(10,50,3) # n's to use
analytical <- matrix(ncol=2, nrow=length(samples))
colnames(analytical) <- c("power", "n")
for(i in samples) { 
  j <- which(samples == i)
  analytical[j,1] <- anova.pwr.mixed(data = study, Formula = "DV ~ time*group", n=i, m=3, rho=0.5)$power[3]
  analytical[j,2] <- i
}
  
MC <- matrix(ncol=2, nrow=length(samples))
colnames(MC) <- c("power", "n")
for(i in samples) { 
  j <- which(samples == i)
  MC[j,1] <- anova.pwr.mixed.sim(data=study, Formula="DV ~ time*group + Error(subjects)", FactorA="group", n=i, rho=0.5, sims=500)$power[3]
  MC[j,2] <- i
}

# plot
MC <- data.frame(MC)
MC$method <- "MC"
analytical <- data.frame(analytical)
analytical$method <- "analytical"
df <- rbind(analytical, MC)

ggplot(df, aes(n, power, group=method, color=method)) + geom_smooth(se=F) + geom_point()

Comparison of analytical versus monte carlo power analysis for mixed design anova
Luckily, the analytical results are consistent with the Monte Carlo results.

References

Faul, F., Erdfelder, E., Lang, A. G., & Buchner, A. (2007). G* Power 3: A flexible statistical power analysis program for the social, behavioral, and biomedical sciences. Behavior research methods, 39(2), 175-191.

Source code

# function ---------------------------------------------------------------
anova.pwr.mixed <- function(data, Formula, n, m, rho, n.needed=FALSE) {
require(stringr)
Formula <- formula(paste(Formula, "+Error(group)")) # aov formula
x <- summary(aov(Formula, data)) # aov model
terms <- c(rownames(x[[1]][[1]]), rownames(x[[2]][[1]])) # terms in model
terms <- str_pad(terms, max(nchar(terms)), "right") # fix typography
pwr <- function(n) {
k <- length(unique(study$group)) # n groups
sigma <- mean(data$SD) # sigma
data$n <- n # n per group
N <- sum(with(data, tapply(n,group,unique))) # total N
x <- summary(aov(Formula, data)) # aov model
var_b <- x[[1]][[1]][[2]] / (k*m) # SS between
var_w <- x[[2]][[1]][[2]] / (k*m) # SS within
f_between <- sqrt(var_b / sigma^2) # f between
f_within <- sqrt(var_w / sigma^2) # f within
# Degrees of freedom ------------------------------------------------------
df1_b <- x[[1]][[1]][[1]] # df model between
df1_w <- x[[2]][[1]][[1]] # df model within
df1 <- c(df1_b, df1_w)
df2_b <- N-1-sum(df1_b) # df error between
df2_w <- (N*m-1) - (N-1) - sum(df1_w) # df error within
# Power Between -----------------------------------------------------------------
lambda <- N * (m/(1+(m-1)*rho)) * f_between^2 # lambda between
F_critical <- qf(0.05,df1_b,df2_b, lower.tail=FALSE) # F critical between
power_b <- pf(F_critical, df1_b, df2_b, lambda, lower.tail=FALSE) # power between
# Power Within -----------------------------------------------------------------
lambda <- (N * m * f_within^2) / (1 - rho) # lambda within
F_critical <- qf(0.05,df1_w,df2_w, lower.tail=FALSE) # F critical within
power_w <- pf(F_critical, df1_w, df2_w, lambda, lower.tail=FALSE) # power within
c("b" = power_b, "w" = power_w)
}
# Get n for desired power -------------------------------------------------
power <- pwr(n) # get powr for 'n'
if(n.needed) {
n_needed <- rep(NA, length(power)) # pre-create object
# loop til desired power is found
for(i in 1:length(power)) {
n2 <- 5
while(pwr(n2)[i] < 0.8) n2 <- n2+1
n_needed[i] <- n2
}
} else n_needed <- NA
# output ------------------------------------------------------------------
out <- data.frame(terms, "power" = round(power, 3),
"n.needed" = n_needed) # n per group
names(out)[1] <- str_pad("Terms", max(nchar(terms)), "right")
out
}

require(mvtnorm)
# function --------------------------------------------------------------------
anova.pwr.mixed.sim <- function(data, Formula, FactorA, n, rho, sims) {
Formula <- formula(Formula) # convert to formula
m <- nlevels(data$time) # number of time points
k <- nlevels(data[[FactorA]]) # number of groups
if(length(n) == 1) n <- rep(n, k) # n for each level of 'group'
n2 <- rep(n, each=m) # n for each cell
n_index <- cumsum(n) # help-variable used when creating subjects factor
subjects <- NULL
# create subjects factor
for(i in 1:k) {
if(i==1) subjects <- c(subjects, (rep(1:n_index[i], m)))
else subjects <- c(subjects, rep((n_index[i-1]+1):n_index[i], m))
}
subjects <- factor(subjects)
# expand model in 'dataframe' to fit subjects
dataframe <- NULL
for(i in 1:nrow(study)) {
dataframe <- rbind(dataframe, data[rep(i, n2[i]),])
}
dataframe$subjects <- subjects
# create covariance matrix from rho and sigmas within cells
cov_matrix <- function(sigmas) {
B <- matrix(sigmas, ncol=length(sigmas), nrow=length(sigmas))
sigma <- t(B) * B * rho
diag(sigma) <- sigmas^2
sigma
}
mu_m <- matrix(data$DV, ncol=m, byrow=T) # used to generate data
sigma_m <- matrix(data$SD, ncol=m, byrow=T) # used to generate data
nterms <- 3 # number of possible effects
sig <- matrix(ncol=nterms, nrow=sims) # pre allocate sig matrix
# actual simulation
for (i in 1:sims) {
dataframe$DV <- unlist(lapply(1:k, function(i) rmvnorm(n[i], mu_m[i,], cov_matrix(sigma_m[i,])))) # generate data for all cells
result <- summary(aov(Formula, dataframe)) # perform ANOVA
tmp <- c(result[[1]][[1]][[5]], result[[2]][[1]][[5]]) # get p-values
tmp_matrix <- matrix(tmp[!is.na(tmp)], nrow=1) # remova NA
sig[i,] <- tmp_matrix # save p-values
}
terms_b <- head(rownames(result[[1]][[1]]), 1) # get names of between-subjects terms
terms_w <- head(rownames(result[[2]][[1]]), 2) # get names of within-subjects terms
out <- data.frame("terms" = c(terms_b, terms_w), "power" = colMeans(sig < 0.05)) # output
out # return output
}

To leave a comment for the author, please follow the link and comment on their blog: R Psychologist » R.

R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

Never miss an update!
Subscribe to R-bloggers to receive
e-mails with the latest R posts.
(You will not see this message again.)

Click here to close (This popup will not appear again)