Analytical and simulation-based power analyses for mixed-design ANOVAs
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
In this post I show some R-examples on how to perform power analyses for mixed-design ANOVAs. The first example is analytical — adapted from formulas used in G*Power (Faul et al., 2007), and the second example is a Monte Carlo simulation. The source code is embedded at the end of this post.
Both functions require a dataframe, containing the parameters that will be used in the power calculations. Here is an example using three groups and three time-points.
# design ------- # mus CT <- c(34.12, 21, 17.44) BA <- c(36.88, 16.82, 8.75) ADM <- c(35.61, 14.39, 7.78) study <- data.frame("group" = gl(3,3, labels=c("CT", "BA", "ADM"))) study$time <- gl(3,1,9, labels=c("Intake", "8 weeks", "16 weeks")) study$DV <- c(CT, BA, ADM) study$SD <- 10 ggplot(study, aes(time, DV, group=group, linetype=group, shape=group)) + geom_line() + geom_point()
Here is a plot of our hypothetical study design.
Now, we will use this design to perform a power analysis using anova.pwr.mixed and anova.pwr.mixed.sim.
# analytical ---------- anova.pwr.mixed(data = study, Formula = "DV ~ time*group", n=10, m=3, rho=0.5) Terms power n.needed b group 0.197 NA w1 time 1.000 NA w2 time:group 0.617 NA # monte carlo ------------ anova.pwr.mixed.sim(data=study, Formula="DV ~ time*group + Error(subjects)", FactorA="group", n=10, rho=0.5, sims=100) terms power 1 group 0.19 2 time 1.00 3 time:group 0.64
Comparison of analytical and monte carlo power analysis
Now let’s compare the two functions’ results on the time x group-interaction. Hopefully, the two methods will yield the same result.
# compare samples <- seq(10,50,3) # n's to use analytical <- matrix(ncol=2, nrow=length(samples)) colnames(analytical) <- c("power", "n") for(i in samples) { j <- which(samples == i) analytical[j,1] <- anova.pwr.mixed(data = study, Formula = "DV ~ time*group", n=i, m=3, rho=0.5)$power[3] analytical[j,2] <- i } MC <- matrix(ncol=2, nrow=length(samples)) colnames(MC) <- c("power", "n") for(i in samples) { j <- which(samples == i) MC[j,1] <- anova.pwr.mixed.sim(data=study, Formula="DV ~ time*group + Error(subjects)", FactorA="group", n=i, rho=0.5, sims=500)$power[3] MC[j,2] <- i } # plot MC <- data.frame(MC) MC$method <- "MC" analytical <- data.frame(analytical) analytical$method <- "analytical" df <- rbind(analytical, MC) ggplot(df, aes(n, power, group=method, color=method)) + geom_smooth(se=F) + geom_point()
Luckily, the analytical results are consistent with the Monte Carlo results.
References
Faul, F., Erdfelder, E., Lang, A. G., & Buchner, A. (2007). G* Power 3: A flexible statistical power analysis program for the social, behavioral, and biomedical sciences. Behavior research methods, 39(2), 175-191.
Source code
# function --------------------------------------------------------------- | |
anova.pwr.mixed <- function(data, Formula, n, m, rho, n.needed=FALSE) { | |
require(stringr) | |
Formula <- formula(paste(Formula, "+Error(group)")) # aov formula | |
x <- summary(aov(Formula, data)) # aov model | |
terms <- c(rownames(x[[1]][[1]]), rownames(x[[2]][[1]])) # terms in model | |
terms <- str_pad(terms, max(nchar(terms)), "right") # fix typography | |
pwr <- function(n) { | |
k <- length(unique(study$group)) # n groups | |
sigma <- mean(data$SD) # sigma | |
data$n <- n # n per group | |
N <- sum(with(data, tapply(n,group,unique))) # total N | |
x <- summary(aov(Formula, data)) # aov model | |
var_b <- x[[1]][[1]][[2]] / (k*m) # SS between | |
var_w <- x[[2]][[1]][[2]] / (k*m) # SS within | |
f_between <- sqrt(var_b / sigma^2) # f between | |
f_within <- sqrt(var_w / sigma^2) # f within | |
# Degrees of freedom ------------------------------------------------------ | |
df1_b <- x[[1]][[1]][[1]] # df model between | |
df1_w <- x[[2]][[1]][[1]] # df model within | |
df1 <- c(df1_b, df1_w) | |
df2_b <- N-1-sum(df1_b) # df error between | |
df2_w <- (N*m-1) - (N-1) - sum(df1_w) # df error within | |
# Power Between ----------------------------------------------------------------- | |
lambda <- N * (m/(1+(m-1)*rho)) * f_between^2 # lambda between | |
F_critical <- qf(0.05,df1_b,df2_b, lower.tail=FALSE) # F critical between | |
power_b <- pf(F_critical, df1_b, df2_b, lambda, lower.tail=FALSE) # power between | |
# Power Within ----------------------------------------------------------------- | |
lambda <- (N * m * f_within^2) / (1 - rho) # lambda within | |
F_critical <- qf(0.05,df1_w,df2_w, lower.tail=FALSE) # F critical within | |
power_w <- pf(F_critical, df1_w, df2_w, lambda, lower.tail=FALSE) # power within | |
c("b" = power_b, "w" = power_w) | |
} | |
# Get n for desired power ------------------------------------------------- | |
power <- pwr(n) # get powr for 'n' | |
if(n.needed) { | |
n_needed <- rep(NA, length(power)) # pre-create object | |
# loop til desired power is found | |
for(i in 1:length(power)) { | |
n2 <- 5 | |
while(pwr(n2)[i] < 0.8) n2 <- n2+1 | |
n_needed[i] <- n2 | |
} | |
} else n_needed <- NA | |
# output ------------------------------------------------------------------ | |
out <- data.frame(terms, "power" = round(power, 3), | |
"n.needed" = n_needed) # n per group | |
names(out)[1] <- str_pad("Terms", max(nchar(terms)), "right") | |
out | |
} |
require(mvtnorm) | |
# function -------------------------------------------------------------------- | |
anova.pwr.mixed.sim <- function(data, Formula, FactorA, n, rho, sims) { | |
Formula <- formula(Formula) # convert to formula | |
m <- nlevels(data$time) # number of time points | |
k <- nlevels(data[[FactorA]]) # number of groups | |
if(length(n) == 1) n <- rep(n, k) # n for each level of 'group' | |
n2 <- rep(n, each=m) # n for each cell | |
n_index <- cumsum(n) # help-variable used when creating subjects factor | |
subjects <- NULL | |
# create subjects factor | |
for(i in 1:k) { | |
if(i==1) subjects <- c(subjects, (rep(1:n_index[i], m))) | |
else subjects <- c(subjects, rep((n_index[i-1]+1):n_index[i], m)) | |
} | |
subjects <- factor(subjects) | |
# expand model in 'dataframe' to fit subjects | |
dataframe <- NULL | |
for(i in 1:nrow(study)) { | |
dataframe <- rbind(dataframe, data[rep(i, n2[i]),]) | |
} | |
dataframe$subjects <- subjects | |
# create covariance matrix from rho and sigmas within cells | |
cov_matrix <- function(sigmas) { | |
B <- matrix(sigmas, ncol=length(sigmas), nrow=length(sigmas)) | |
sigma <- t(B) * B * rho | |
diag(sigma) <- sigmas^2 | |
sigma | |
} | |
mu_m <- matrix(data$DV, ncol=m, byrow=T) # used to generate data | |
sigma_m <- matrix(data$SD, ncol=m, byrow=T) # used to generate data | |
nterms <- 3 # number of possible effects | |
sig <- matrix(ncol=nterms, nrow=sims) # pre allocate sig matrix | |
# actual simulation | |
for (i in 1:sims) { | |
dataframe$DV <- unlist(lapply(1:k, function(i) rmvnorm(n[i], mu_m[i,], cov_matrix(sigma_m[i,])))) # generate data for all cells | |
result <- summary(aov(Formula, dataframe)) # perform ANOVA | |
tmp <- c(result[[1]][[1]][[5]], result[[2]][[1]][[5]]) # get p-values | |
tmp_matrix <- matrix(tmp[!is.na(tmp)], nrow=1) # remova NA | |
sig[i,] <- tmp_matrix # save p-values | |
} | |
terms_b <- head(rownames(result[[1]][[1]]), 1) # get names of between-subjects terms | |
terms_w <- head(rownames(result[[2]][[1]]), 2) # get names of within-subjects terms | |
out <- data.frame("terms" = c(terms_b, terms_w), "power" = colMeans(sig < 0.05)) # output | |
out # return output | |
} | |
R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.