[This article was first published on S+/R – Yet Another Blog in Statistical Computing, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
pkgs <- list("hflights", "doParallel", "foreach", "dplyr", "rbenchmark", "data.table")
lapply(pkgs, require, character.only = T)
data(hflights)
benchmark(replications = 10, order = "user.self", relative = "user.self",
transform = {
### THE GENERIC FUNCTION MODIFYING THE DATA.FRAME, SIMILAR TO DATA.FRAME() ###
transform(hflights, wday = ifelse(DayOfWeek %in% c(6, 7), 'weekend', 'weekday'), delay = ArrDelay + DepDelay)
},
within = {
### EVALUATE THE EXPRESSION WITHIN THE LOCAL ENVIRONMENT ###
within(hflights, {wday = ifelse(DayOfWeek %in% c(6, 7), 'weekend', 'weekday'); delay = ArrDelay + DepDelay})
},
mutate = {
### THE SPECIFIC FUNCTION IN DPLYR PACKAGE TO ADD VARIABLES ###
mutate(hflights, wday = ifelse(DayOfWeek %in% c(6, 7), 'weekend', 'weekday'), delay = ArrDelay + DepDelay)
},
foreach = {
### SPLIT AND THEN COMBINE IN PARALLEL ###
registerDoParallel(cores = 2)
v <- c(names(hflights), 'wday', 'delay')
f <- expression(ifelse(hflights$DayOfWeek %in% c(6, 7), 'weekend', 'weekday'),
hflights$ArrDelay + hflights$DepDelay)
df <- foreach(fn = iter(f), .combine = mutate, .init = hflights) %dopar% {
eval(fn)
}
names(df) <- v
},
data.table = {
### DATA.TABLE ###
data.table(hflights)[, c("wday", "delay") := list(ifelse(hflights$DayOfWeek %in% c(6, 7), 'weekend', 'weekday'), hflights$ArrDelay + hflights$DepDelay)]
}
)
# test replications elapsed relative user.self sys.self user.child
# 4 foreach 10 1.442 1.000 0.240 0.144 0.848
# 2 within 10 0.667 2.783 0.668 0.000 0.000
# 3 mutate 10 0.679 2.833 0.680 0.000 0.000
# 5 data.table 10 0.955 3.983 0.956 0.000 0.000
# 1 transform 10 1.732 7.200 1.728 0.000 0.000
To leave a comment for the author, please follow the link and comment on their blog: S+/R – Yet Another Blog in Statistical Computing.
R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
