library(fitzRoy)
library(tidyverse)
## -- Attaching packages --------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1 v purrr 0.2.5
## v tibble 1.4.2 v dplyr 0.7.5
## v tidyr 0.8.1 v stringr 1.3.1
## v readr 1.1.1 v forcats 0.3.0
## -- Conflicts ------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(mgcv)
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
## This is mgcv 1.8-23. For overview type 'help("mgcv-package")'.
afltables<-fitzRoy::get_match_results()
tips <- get_squiggle_data("tips")
## Getting data from https://api.squiggle.com.au/?q=tips
afltables<-afltables%>%mutate(Home.Team = str_replace(Home.Team, "GWS", "Greater Western Sydney"))
afltables<-afltables %>%mutate(Home.Team = str_replace(Home.Team, "Footscray", "Western Bulldogs"))
unique(afltables$Home.Team)
## [1] "Fitzroy" "Collingwood"
## [3] "Geelong" "Sydney"
## [5] "Essendon" "St Kilda"
## [7] "Melbourne" "Carlton"
## [9] "Richmond" "University"
## [11] "Hawthorn" "North Melbourne"
## [13] "Western Bulldogs" "West Coast"
## [15] "Brisbane Lions" "Adelaide"
## [17] "Fremantle" "Port Adelaide"
## [19] "Gold Coast" "Greater Western Sydney"
names(afltables)
## [1] "Game" "Date" "Round" "Home.Team"
## [5] "Home.Goals" "Home.Behinds" "Home.Points" "Away.Team"
## [9] "Away.Goals" "Away.Behinds" "Away.Points" "Venue"
## [13] "Margin" "Season" "Round.Type" "Round.Number"
names(tips)
## [1] "venue" "hteamid" "tip" "correct" "date"
## [6] "round" "ateam" "bits" "year" "confidence"
## [11] "updated" "tipteamid" "gameid" "ateamid" "err"
## [16] "sourceid" "margin" "source" "hconfidence" "hteam"
tips$date<-ymd_hms(tips$date)
tips$date<-as.Date(tips$date)
afltables$Date<-ymd(afltables$Date)
joined_dataset<-left_join(tips, afltables, by=c("hteam"="Home.Team", "date"="Date"))
df<-joined_dataset%>%
select(hteam, ateam,tip,correct, hconfidence, round, date,
source, margin, Home.Points, Away.Points, year)%>%
mutate(squigglehomemargin=if_else(hteam==tip, margin, -margin),
actualhomemargin=Home.Points-Away.Points,
hconfidence=hconfidence/100)%>%
filter(source=="PlusSixOne")%>%
select(round, hteam, ateam, hconfidence, squigglehomemargin, actualhomemargin, correct)
df<-df[complete.cases(df),]
df$hteam<-as.factor(df$hteam)
df$ateam<-as.factor(df$ateam)
ft=gam(I(actualhomemargin>0)~s(hconfidence),data=df,family="binomial")
df$logitChance = log(df$hconfidence)/log(100-df$hconfidence)
ft=gam(I(actualhomemargin>0)~s(logitChance),data=df,family="binomial")
preds = predict(ft,type="response",se.fit=TRUE)
predSort=sort(preds$fit,index.return=TRUE)
plot(predSort$x~df$hconfidence[predSort$ix],col="red",type="l")
abline(h=0.5,col="blue")
abline(v=50,col="blue")
abline(c(0,1),col="purple")
lines(df$hconfidence[predSort$ix],predSort$x+2*preds$se.fit[predSort$ix])
lines(df$hconfidence[predSort$ix],predSort$x-2*preds$se.fit[predSort$ix])
# predicting winners
ft=gam(I(actualhomemargin>0)~s(hconfidence),data=df,family="binomial",sp=0.05)
# the 0.05 was to make it a bit wiggly but not too silly (the default was not monotonically increasing, which is silly)
plot(ft,rug=FALSE,trans=binomial()$linkinv)
abline(h=0.5,col="blue")
abline(v=0.5,col="blue")
abline(c(0,1),col="purple")
# predicting margins
ft=gam(actualhomemargin~s(hconfidence),data=df)
plot(ft,rug=FALSE,residual=TRUE,pch=1,cex=0.4)
abline(h=0.5,col="blue")
abline(v=0.5,col="blue")
# add squiggle margins to the plot
confSort = sort(df$hconfidence,index.return=TRUE)
lines(confSort$x,df$squigglehomemargin[confSort$ix],col="purple")