[This article was first published on Analysis of AFL, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
library(fitzRoy) library(tidyverse) ## -- Attaching packages --------------------------------------------------- tidyverse 1.2.1 -- ## v ggplot2 2.2.1 v purrr 0.2.5 ## v tibble 1.4.2 v dplyr 0.7.5 ## v tidyr 0.8.1 v stringr 1.3.1 ## v readr 1.1.1 v forcats 0.3.0 ## -- Conflicts ------------------------------------------------------ tidyverse_conflicts() -- ## x dplyr::filter() masks stats::filter() ## x dplyr::lag() masks stats::lag() library(lubridate) ## ## Attaching package: 'lubridate' ## The following object is masked from 'package:base': ## ## date library(mgcv) ## Loading required package: nlme ## ## Attaching package: 'nlme' ## The following object is masked from 'package:dplyr': ## ## collapse ## This is mgcv 1.8-23. For overview type 'help("mgcv-package")'. afltables<-fitzRoy::get_match_results() tips <- get_squiggle_data("tips") ## Getting data from https://api.squiggle.com.au/?q=tips afltables<-afltables%>%mutate(Home.Team = str_replace(Home.Team, "GWS", "Greater Western Sydney")) afltables<-afltables %>%mutate(Home.Team = str_replace(Home.Team, "Footscray", "Western Bulldogs")) unique(afltables$Home.Team) ## [1] "Fitzroy" "Collingwood" ## [3] "Geelong" "Sydney" ## [5] "Essendon" "St Kilda" ## [7] "Melbourne" "Carlton" ## [9] "Richmond" "University" ## [11] "Hawthorn" "North Melbourne" ## [13] "Western Bulldogs" "West Coast" ## [15] "Brisbane Lions" "Adelaide" ## [17] "Fremantle" "Port Adelaide" ## [19] "Gold Coast" "Greater Western Sydney" names(afltables) ## [1] "Game" "Date" "Round" "Home.Team" ## [5] "Home.Goals" "Home.Behinds" "Home.Points" "Away.Team" ## [9] "Away.Goals" "Away.Behinds" "Away.Points" "Venue" ## [13] "Margin" "Season" "Round.Type" "Round.Number" names(tips) ## [1] "venue" "hteamid" "tip" "correct" "date" ## [6] "round" "ateam" "bits" "year" "confidence" ## [11] "updated" "tipteamid" "gameid" "ateamid" "err" ## [16] "sourceid" "margin" "source" "hconfidence" "hteam" tips$date<-ymd_hms(tips$date) tips$date<-as.Date(tips$date) afltables$Date<-ymd(afltables$Date) joined_dataset<-left_join(tips, afltables, by=c("hteam"="Home.Team", "date"="Date")) df<-joined_dataset%>% select(hteam, ateam,tip,correct, hconfidence, round, date, source, margin, Home.Points, Away.Points, year)%>% mutate(squigglehomemargin=if_else(hteam==tip, margin, -margin), actualhomemargin=Home.Points-Away.Points, hconfidence=hconfidence/100)%>% filter(source=="PlusSixOne")%>% select(round, hteam, ateam, hconfidence, squigglehomemargin, actualhomemargin, correct) df<-df[complete.cases(df),] df$hteam<-as.factor(df$hteam) df$ateam<-as.factor(df$ateam) ft=gam(I(actualhomemargin>0)~s(hconfidence),data=df,family="binomial") df$logitChance = log(df$hconfidence)/log(100-df$hconfidence) ft=gam(I(actualhomemargin>0)~s(logitChance),data=df,family="binomial") preds = predict(ft,type="response",se.fit=TRUE) predSort=sort(preds$fit,index.return=TRUE) plot(predSort$x~df$hconfidence[predSort$ix],col="red",type="l") abline(h=0.5,col="blue") abline(v=50,col="blue") abline(c(0,1),col="purple") lines(df$hconfidence[predSort$ix],predSort$x+2*preds$se.fit[predSort$ix]) lines(df$hconfidence[predSort$ix],predSort$x-2*preds$se.fit[predSort$ix])
# predicting winners ft=gam(I(actualhomemargin>0)~s(hconfidence),data=df,family="binomial",sp=0.05) # the 0.05 was to make it a bit wiggly but not too silly (the default was not monotonically increasing, which is silly) plot(ft,rug=FALSE,trans=binomial()$linkinv) abline(h=0.5,col="blue") abline(v=0.5,col="blue") abline(c(0,1),col="purple")
# predicting margins ft=gam(actualhomemargin~s(hconfidence),data=df) plot(ft,rug=FALSE,residual=TRUE,pch=1,cex=0.4) abline(h=0.5,col="blue") abline(v=0.5,col="blue") # add squiggle margins to the plot confSort = sort(df$hconfidence,index.return=TRUE) lines(confSort$x,df$squigglehomemargin[confSort$ix],col="purple")
To leave a comment for the author, please follow the link and comment on their blog: Analysis of AFL.
R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.