[This article was first published on Analysis of AFL, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
library(fitzRoy)
library(tidyverse)
## -- Attaching packages --------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1 v purrr 0.2.5
## v tibble 1.4.2 v dplyr 0.7.5
## v tidyr 0.8.1 v stringr 1.3.1
## v readr 1.1.1 v forcats 0.3.0
## -- Conflicts ------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(mgcv)
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
## This is mgcv 1.8-23. For overview type 'help("mgcv-package")'.
afltables<-fitzRoy::get_match_results()
tips <- get_squiggle_data("tips")
## Getting data from https://api.squiggle.com.au/?q=tips
afltables<-afltables%>%mutate(Home.Team = str_replace(Home.Team, "GWS", "Greater Western Sydney"))
afltables<-afltables %>%mutate(Home.Team = str_replace(Home.Team, "Footscray", "Western Bulldogs"))
unique(afltables$Home.Team)
## [1] "Fitzroy" "Collingwood"
## [3] "Geelong" "Sydney"
## [5] "Essendon" "St Kilda"
## [7] "Melbourne" "Carlton"
## [9] "Richmond" "University"
## [11] "Hawthorn" "North Melbourne"
## [13] "Western Bulldogs" "West Coast"
## [15] "Brisbane Lions" "Adelaide"
## [17] "Fremantle" "Port Adelaide"
## [19] "Gold Coast" "Greater Western Sydney"
names(afltables)
## [1] "Game" "Date" "Round" "Home.Team"
## [5] "Home.Goals" "Home.Behinds" "Home.Points" "Away.Team"
## [9] "Away.Goals" "Away.Behinds" "Away.Points" "Venue"
## [13] "Margin" "Season" "Round.Type" "Round.Number"
names(tips)
## [1] "venue" "hteamid" "tip" "correct" "date"
## [6] "round" "ateam" "bits" "year" "confidence"
## [11] "updated" "tipteamid" "gameid" "ateamid" "err"
## [16] "sourceid" "margin" "source" "hconfidence" "hteam"
tips$date<-ymd_hms(tips$date)
tips$date<-as.Date(tips$date)
afltables$Date<-ymd(afltables$Date)
joined_dataset<-left_join(tips, afltables, by=c("hteam"="Home.Team", "date"="Date"))
df<-joined_dataset%>%
select(hteam, ateam,tip,correct, hconfidence, round, date,
source, margin, Home.Points, Away.Points, year)%>%
mutate(squigglehomemargin=if_else(hteam==tip, margin, -margin),
actualhomemargin=Home.Points-Away.Points,
hconfidence=hconfidence/100)%>%
filter(source=="PlusSixOne")%>%
select(round, hteam, ateam, hconfidence, squigglehomemargin, actualhomemargin, correct)
df<-df[complete.cases(df),]
df$hteam<-as.factor(df$hteam)
df$ateam<-as.factor(df$ateam)
ft=gam(I(actualhomemargin>0)~s(hconfidence),data=df,family="binomial")
df$logitChance = log(df$hconfidence)/log(100-df$hconfidence)
ft=gam(I(actualhomemargin>0)~s(logitChance),data=df,family="binomial")
preds = predict(ft,type="response",se.fit=TRUE)
predSort=sort(preds$fit,index.return=TRUE)
plot(predSort$x~df$hconfidence[predSort$ix],col="red",type="l")
abline(h=0.5,col="blue")
abline(v=50,col="blue")
abline(c(0,1),col="purple")
lines(df$hconfidence[predSort$ix],predSort$x+2*preds$se.fit[predSort$ix])
lines(df$hconfidence[predSort$ix],predSort$x-2*preds$se.fit[predSort$ix])
# predicting winners ft=gam(I(actualhomemargin>0)~s(hconfidence),data=df,family="binomial",sp=0.05) # the 0.05 was to make it a bit wiggly but not too silly (the default was not monotonically increasing, which is silly) plot(ft,rug=FALSE,trans=binomial()$linkinv) abline(h=0.5,col="blue") abline(v=0.5,col="blue") abline(c(0,1),col="purple")
# predicting margins ft=gam(actualhomemargin~s(hconfidence),data=df) plot(ft,rug=FALSE,residual=TRUE,pch=1,cex=0.4) abline(h=0.5,col="blue") abline(v=0.5,col="blue") # add squiggle margins to the plot confSort = sort(df$hconfidence,index.return=TRUE) lines(confSort$x,df$squigglehomemargin[confSort$ix],col="purple")
To leave a comment for the author, please follow the link and comment on their blog: Analysis of AFL.
R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
