Science of The Super Bowl
[This article was first published on R – Jesse Piburn, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
A couple days ago, I participated in a Science of the Super Bowl Panel discussion organized by Newswise. I was asked to give a 5 (which turned more into about 10) minute overview, so I focused on answering 3 questions.
- What is data science?
- How is data science used in the NFL?
- How might data science affect the outcome of the Super Bowl?
For my talk (around 21:45 mark) I made some visuals, so I thought I would recreate one here and include the R code. Here is the finished product, below is the code to reproduce it. Here is a higher res copy, that is actually readable.
###################################################### # RB direction charts # 2/1/2017 # # ###################################################### library(nflscrapR) library(dplyr) library(ggplot2) library(ggthemes) library(gtable) library(grid) library(gridExtra) # download all paly by play data for 2016 including playoffs ----- # this is going to take a while reg_games <- extracting_gameids(2016, playoffs = FALSE) post_games <- extracting_gameids(2016, playoffs = TRUE) all_games <- c(reg_games, post_games) game_list <- lapply(all_games, game_play_by_play) s16 <- bind_rows(game_list) # calculate run percentages ----- rundf <- s16 %>% filter(PlayType %in% c("Run") & !is.na(down) & posteam %in% c("NE", "ATL")) %>% mutate(depth = ifelse(Yards.Gained <= 0, "Negative", "Short"), depth = ifelse(Yards.Gained >= 5, "Middle", depth), depth = ifelse(Yards.Gained >= 10, "Deep", depth)) %>% group_by(posteam, Rusher) %>% mutate(down_n = n()) %>% group_by(posteam, Rusher, RunLocation, RunGap, depth) %>% summarise(play_per = (n() / mean(down_n)) * 100, loc_att = n(), total_att = mean(down_n)) # just a little clean up ----- rundf[is.na(rundf$RunLocation), "RunLocation"] <- "middle" rundf[rundf$RunLocation == "middle", "RunGap"] <- "" rundf$runplace <- stringr::str_trim(paste(rundf$RunLocation, rundf$RunGap)) rundf$runplace <- factor(rundf$runplace, c("left end", "left tackle", "left guard", "middle", "right guard", "right tackle", "right end")) rundf$depth <- factor(rundf$depth, c("Negative", "Short", "Middle", "Deep")) # not all RBs will have runs to all locations, so make a full data frame ----- # and join to it before charting fulldf <- expand.grid("runplace" =c("left end", "left tackle", "left guard", "middle", "right guard", "right tackle", "right end"), "depth" = c("Negative", "Short", "Middle", "Deep")) # Devonta Freeman ----- freemandf <- rundf %>% filter(Rusher == "D.Freeman") %>% right_join(fulldf) %>% mutate(play_per = ifelse(is.na(play_per), 0, play_per)) freeman_plot <- ggplot(freemandf, aes(x = runplace, y = depth)) + geom_tile(aes(fill = play_per),colour = "white") + scale_fill_gradient(high = "#ca0020", low = "#0571b0", limits = c(0,15), guide_colourbar(title = "%")) + theme_fivethirtyeight() + labs(title = "Devonta Freeman") # Dion Lewis ----- lewisdf <- rundf %>% filter(Rusher == "D.Lewis") %>% right_join(fulldf) %>% mutate(play_per = ifelse(is.na(play_per), 0, play_per)) lewis_plot <- ggplot(lewisdf, aes(x = runplace, y = depth)) + geom_tile(aes(fill = play_per),colour = "white") + scale_fill_gradient(high = "#ca0020", low = "#0571b0", limits = c(0,30), guide_colourbar(title = "%")) + theme_fivethirtyeight() + labs(title = "Dion Lewis") # blount ----- blountdf <- rundf %>% filter(Rusher == "L.Blount") %>% right_join(fulldf) %>% mutate(play_per = ifelse(is.na(play_per), 0, play_per)) blount_plot<- ggplot(blountdf, aes(x = runplace, y = depth)) + geom_tile(aes(fill = play_per),colour = "white") + scale_fill_gradient(high = "#ca0020", low = "#0571b0", limits = c(0,15), guide_colourbar(title = "%")) + theme_fivethirtyeight() + labs(title = "LeGarrette Blount") # colemen ----- colemandf <- rundf %>% filter(Rusher == "T.Coleman") %>% right_join(fulldf) %>% mutate(play_per = ifelse(is.na(play_per), 0, play_per)) coleman_plot <- ggplot(colemandf, aes(x = runplace, y = depth)) + geom_tile(aes(fill = play_per),colour = "white") + scale_fill_gradient(high = "#ca0020", low = "#0571b0", limits = c(0,15), guide_colourbar(title = "%")) + theme_fivethirtyeight() + labs(title = "Tevin Coleman") # combine plots and add text ----- p1 <- arrangeGrob(freeman_plot, coleman_plot, blount_plot, lewis_plot) # sub title ---- titleback <- rectGrob(gp=gpar(fill = "#F0F0F0", col = NA)) titlesub <- textGrob("2016 Run Tendency for Super Bowl Running Backs", gp = gpar(fontsize = 12, fontface = "bold", fontfamily = "sans", col = "#3C3C3C"), just = "left", x = unit(0.01, "npc")) padding <- unit(5,"mm") p1 <- gtable_add_rows(p1, heights = grobHeight(titlesub) + padding, pos = 0) p1 <- gtable_add_grob(p1, grobTree(titleback, titlesub), 1, 1, 1, ncol(p1)) # title ---- title <- textGrob("Which Way Did he Go?", gp = gpar(fontsize = 18, fontface = "bold", fontfamily = "sans", col = "#3C3C3C"), just = "left", x = unit(0.01, "npc")) p1 <- gtable_add_rows(p1, heights = grobHeight(title) + padding, pos = 0) p1 <- gtable_add_grob(p1, grobTree(titleback, title), 1, 1, 1, ncol(p1)) # bottom ---- subtext <- textGrob("Source: @JesseOPiburn", gp = gpar(fontsize = 10, fontface = "bold.italic", fontfamily = "sans", col = "#3C3C3C"), just = "left", x = unit(0.01, "npc")) p1 <- gtable_add_rows(p1, heights = grobHeight(subtext) + padding, pos = -1) p1 <- gtable_add_grob(p1, grobTree(titleback,subtext), t = nrow(p1), l = 1, b = nrow(p1), r = 2) grid.draw(p1) ggsave(plot = p1, filename = "plots/rb direction.png", width = 18, height = 10, units = "in", dpi = 600)
To leave a comment for the author, please follow the link and comment on their blog: R – Jesse Piburn.
R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.