Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
This example will be using Betfair which we hope to add to fitzRoy in the future. Making it easier to compare Squiggle and maybe your own models vs the market!
Step One Read in the Betfair Data
The first step is just go to the betfair site and download the AFL file with the data we need.
I will only be joining the 2017-onwards data because that’s what squiggle has!
library(readxl) library(tidyverse) ## ── Attaching packages ───────────────────────────────────────────────────────── tidyverse 1.2.1 ── ## ✔ ggplot2 3.1.0 ✔ purrr 0.2.5 ## ✔ tibble 2.0.1 ✔ dplyr 0.7.8 ## ✔ tidyr 0.8.2 ✔ stringr 1.3.1 ## ✔ readr 1.3.1 ✔ forcats 0.3.0 ## ── Conflicts ──────────────────────────────────────────────────────────── tidyverse_conflicts() ── ## ✖ dplyr::filter() masks stats::filter() ## ✖ dplyr::lag() masks stats::lag() filetodownload <- "https://www.betfair.com.au/hub/wp-content/uploads/sites/2/2018/10/Copy-of-Weekly-AFL-Data-Dump-2017-YTD-2.xlsx" download.file(filetodownload,"aflodds.xlsx",mode="wb") Copy_of_Weekly_AFL_Data_Dump_2017_YTD <- read_excel("aflodds.xlsx", col_types = c("date", "text", "numeric", "text", "text", "text", "text", "numeric", "text", "text", "text", "text", "text", "text", "text", "text")) ## Warning in read_fun(path = enc2native(normalizePath(path)), sheet_i = ## sheet, : Expecting date in B4 / R4C2: got 'SETT_DATE' ## New names: ## * `` -> `..1` ## * `` -> `..2` ## * `` -> `..4` ## * `` -> `..5` ## * `` -> `..6` ## * … and 10 more dim(Copy_of_Weekly_AFL_Data_Dump_2017_YTD) ## [1] 1610 16 head(Copy_of_Weekly_AFL_Data_Dump_2017_YTD) ## # A tibble: 6 x 16 ## ..1 ..2 `AFL 2017 - YTD` ..4 ..5 ..6 ..7 ..8 ## <dttm> <chr> <dbl> <chr> <chr> <chr> <chr> <dbl> ## 1 NA <NA> NA <NA> <NA> <NA> <NA> NA ## 2 NA EVEN… NA MENU… EVEN… PATHS PARE… NA ## 3 2017-03-23 00:00:00 1281… NA AFL … Matc… AFL … Aust… NA ## 4 2017-03-23 00:00:00 1281… NA AFL … Matc… AFL … Aust… NA ## 5 2017-03-23 00:00:00 1281… NA AFL … Matc… AFL … Aust… NA ## 6 2017-03-23 00:00:00 1281… NA AFL … Matc… AFL … Aust… NA ## # … with 8 more variables: ..9 <chr>, ..10 <chr>, ..11 <chr>, ..12 <chr>, ## # ..13 <chr>, ..14 <chr>, ..15 <chr>, ..16 <chr> df_bookie<-slice(Copy_of_Weekly_AFL_Data_Dump_2017_YTD,2:1430) names(df_bookie) <- lapply(df_bookie[1, ], as.character) ## Warning: Must use a character vector as names. ## This warning is displayed once per session. names(df_bookie) ## [1] NA "EVENT_ID" NA ## [4] "MENU_HINT" "EVENT_NAME" "PATHS" ## [7] "PARENT_MENU_HINT" NA "PARENT_EVENT_NAME" ## [10] "SELECTION_NAME" "INPLAY" "MIN_PRICE" ## [13] "MAX_PRICE" "LAST_MATCHED_PRICE" "WAP" ## [16] "VOLUME_AUD"
The columns of interest to join on would be, the Date, the selection name and WAP and we want to maybe join this on to the squiggle guys. The next thing we notice by viewing the dataset View(Copy_of_Weekly_AFL_Data_Dump_2017_YTD)
is that if we look at the column INPLAY
we want when it is N
the reason being is that we want to compare the squiggle pre game tips with what the market was doing pre game.
Step Two Get the squiggle data
library(fitzRoy) tips <- get_squiggle_data("tips") ## Getting data from https://api.squiggle.com.au/?q=tips df<-tips%>%mutate(home.margin=ifelse(hteam==tip, margin,-margin))%>% mutate(away.margin=ifelse(ateam==tip, margin,-margin)) %>% select(source,date,correct, hconfidence,hteam, ateam,home.margin,away.margin,err ,tip,round, year) df1<-select(df,source, date, correct, hconfidence,hteam, home.margin, err, tip, round, year ) df1$H_A<-"HOME" df2<-select(df, source, date, correct, hconfidence, ateam, away.margin, err, tip, round, year) df2$H_A<-"AWAY" colnames(df1)[5]<-"Team" colnames(df1)[6] <- "margin" colnames(df2)[5]<-"Team" colnames(df2)[6]<-"margin" df3<-rbind(df1,df2) colnames(df3)[2]<-"Date"
Step Three make sure the names of columns align!
colnames(df_bookie)[1] <- "Date" colnames(df_bookie)[10] <- "Team" colnames(df_bookie)[3]<-"x3" colnames(df_bookie)[8]<-"x8" df_bookie<-df_bookie%>%filter(INPLAY=="N")%>% select(Date, Team, WAP) df_bookie$Date<-as.Date(df_bookie$Date) df3$Date<-as.Date(df3$Date)
Step 4 Check what you are joining by!
Before we join on the datasets, we need to make sure that our IDs line up, so that our teams are named the same in both datasets.
unique(df_bookie$Team) ## [1] "Carlton" "Richmond" ## [3] "Carlton +14.5pts" "Richmond -14.5pts" ## [5] "Collingwood +14.5pts" "Western Bulldogs -14.5pts" ## [7] "Fremantle" "Geelong Cats" ## [9] "Fremantle +8.5pts" "Geelong -8.5pts" ## [11] "Sydney Swans" "Western Bulldogs" ## [13] "Sydney +22.5pts" "Western Bulldogs -22.5pts" ## [15] "Adelaide Crows" "Hawthorn" ## [17] "Brisbane Lions" "Essendon" ## [19] "Brisbane +15.5pts" "Essendon -15.5pts" ## [21] "St Kilda +38.5pts" "West Coast -38.5pts" ## [23] "North Melbourne" "Melbourne Demons" ## [25] "Carlton +29.5pts" "Melbourne -29.5pts" ## [27] "Fremantle +28.5pts" "Port Adelaide -28.5pts" ## [29] "Collingwood" "Collingwood +12.5pts" ## [31] "Sydney -12.5pts" "West Coast Eagles" ## [33] "Richmond +12.5pts" "West Coast -12.5pts" ## [35] "Adelaide -8.5pts" "Port Adelaide +8.5pts" ## [37] "St Kilda" "Carlton +17.5pts" ## [39] "Essendon -17.5pts" "Gold Coast Suns" ## [41] "Gold Coast +20.5pts" "Hawthorn -20.5pts" ## [43] "Sydney +20.5pts" "West Coast -20.5pts" ## [45] "North Melbourne +23.5pts" "Western Bulldogs -23.5pts" ## [47] "Fremantle +22.5pts" "Melbourne -22.5pts" ## [49] "GWS Giants" "Port Adelaide Power" ## [51] "GWS -17.5pts" "Port Adelaide +17.5pts" ## [53] "Adelaide -35.5pts" "Essendon +35.5pts" ## [55] "Collingwood -6.5pts" "St Kilda +6.5pts" ## [57] "Brisbane +17.5pts" "Richmond -17.5pts" ## [59] "Geelong -14.5pts" "Hawthorn +14.5pts" ## [61] "Carlton +52.5pts" "Port Adelaide -52.5pts" ## [63] "Adelaide -27.5pts" "Gold Coast +27.5pts" ## [65] "GWS -13.5pts" "Sydney +13.5pts" ## [67] "Fremantle -6.5pts" "North Melbourne +6.5pts" ## [69] "Geelong -15.5pts" "St Kilda +15.5pts" ## [71] "Hawthorn +16.5pts" "West Coast -16.5pts" ## [73] "Melbourne +5.5pts" "Richmond -5.5pts" ## [75] "GWS -24.5pts" "Western Bulldogs +24.5pts" ## [77] "Carlton +30.5pts" "Sydney -30.5pts" ## [79] "Brisbane +40.5pts" "Port Adelaide -40.5pts" ## [81] "Gold Coast +14.5pts" "North Melbourne -14.5pts" ## [83] "Fremantle +25.5pts" "West Coast -25.5pts" ## [85] "Collingwood +26.5pts" "Geelong -26.5pts" ## [87] "Adelaide -39.5pts" "Richmond +39.5pts" ## [89] "GWS -14.5pts" "St Kilda +14.5pts" ## [91] "Adelaide -37.5pts" "North Melbourne +37.5pts" ## [93] "Geelong -16.5pts" "Gold Coast +16.5pts" ## [95] "Richmond +26.5pts" "Western Bulldogs -26.5pts" ## [97] "Brisbane +49.5pts" "Sydney -49.5pts" ## [99] "Melbourne -14.5pts" "Western Bulldogs +16.5pts" ## [101] "Collingwood +31.5pts" "GWS -31.5pts" ## [103] "Essendon +27.5pts" "Geelong -27.5pts" ## [105] "Adelaide -38.5pts" "Melbourne +38.5pts" ## [107] "North Melbourne -7.5pts" "Sydney +7.5pts" ## [109] "Port Adelaide -27.5pts" "Wetsern Bulldogs" ## [111] "Geelong +3.5pts" "Western Bulldogs -3.5pts" ## [113] "Collingwood -12.5pts" "Hawthorn +12.5pts" ## [115] "Essendon +11.5pts" "West Coast -11.5pts" ## [117] "Geelong -9.5pts" "Port Adelaide +9.5pts" ## [119] "Hawthorn +40.5pts" "Sydney -40.5pts" ## [121] "St Kilda +15.5" "Western Bulldogs -15.5pts" ## [123] "Essendon +4.5pts" "Richmond -4.5pts" ## [125] "Fremantle +35.5pts" "Brisbane +51.5pts" ## [127] "Collingwood -51.5pts" "Carlton +21.5pts" ## [129] "North Melbourne -21.5pts" "GWS +12.5pts" ## [131] "Hawthorn +42.5pts" "Port Adelaide -42.5pts" ## [133] "Adelaide -6.5pts" "Geelong +6.5pts" ## [135] "Gold Coast +8.5pts" "West Coast -8.5pts" ## [137] "Essendon +29.5pts" "GWS -29.5pts" ## [139] "Sydney -7.5pts" "Western Bulldogs +7.5pts" ## [141] "Adelaide -36.5pts" "St Kilda +36.5pts" ## [143] "Essendon +25.5pts" "Port Adelaide -25.5pts" ## [145] "Brisbane +7.5pts" "Fremantle -7.5pts" ## [147] "Carlton +25.5pts" "GWS -25.5pts" ## [149] "Geelong -5.5pts" "West Coast +5.5pts" ## [151] "Adelaide -48.5pts" "Hawthorn +48.5pts" ## [153] "Fremantle +57.5pts" "Geelong -57.5pts" ## [155] "Gold Coast +37.5pts" "St Kilda -37.5pts" ## [157] "West Coast +25.5pts" "Western Bulldogs -25.5pts" ## [159] "Adelaide -24.5pts" "Carlton +24.5pts" ## [161] "Gold Coast -10.5pts" "North Melbourne +10.5pts" ## [163] "Geelong +21.5pts" "GWS -21.5pts" ## [165] "Collingwood -9.5pts" "Hawthorn +9.5pts" ## [167] "Fremantle +11.5pts" "St Kilda -11.5pts" ## [169] "Brisbane +44.5pts" "Essendon -44.5pts" ## [171] "Gold Coast +45.5pts" "Sydney -45.5pts" ## [173] "Brisbane +30.5pts" "Geelong -30.5pts" ## [175] "Fremantle +16.5pts" "North Melbourne -16.5pts" ## [177] "Essendon +5.5pts" "St Kilda -5.5pts" ## [179] "North Melbourne +46.5pts" "Port Adelaide -46.5pts" ## [181] "Fremantle +26.5pts" "West Coast -26.5pts" ## [183] "Essendon -34.5pts" "North Melbourne +34.5pts" ## [185] "Melbourne +9.5pts" "Port Adelaide -9.5pts" ## [187] "St Kilda +31.5pts" "Sydney -31.5pts" ## [189] "Sydney -14.5pts" "Melbourne -19.5pts" ## [191] "North Melbourne +19.5pts" "Port Adelaide -37.5pts" ## [193] "St Kilda +37.5pts" "Gold Coast +15.5pts" ## [195] "Richmond -15.5pts" "Essendon -5.5pts" ## [197] "Western Bulldogs +5.5pts" "Adelaide -12.5pts" ## [199] "Brisbane +54.5pts" "West Coast -54.5pts" ## [201] "Melbourne +13.5pts" "Carlton +26.5pts" ## [203] "Essendon -26.5pts" "Brisbane +29.5pts" ## [205] "Western Bulldogs -29.5pts" "Collingwood -19.5pts" ## [207] "Fremantle -12.5pts" "Gold Coast +12.5pts" ## [209] "Adelaide -11.5pts" "Port Adelaide +11.5pts" ## [211] "GWS -5.5pts" "Geelong +8.5pts" ## [213] "Richmond -8.5pts" "Adelaide -28.5pts" ## [215] "Essendon +28.5pts" "Carlton +42.5pts" ## [217] "West Coast -42.5pts" "Hawthorn -16.5pts" ## [219] "North Melbourne +16.5pts" "Collingwood +22.5pts" ## [221] "Port Adelaide -22.5pts" "Sydney +11.5pts" ## [223] "Essendon -20.5pts" "Brisbane +45.5pts" ## [225] "Melbourne -45.5pts" "Richmond -26.5pts" ## [227] "Hawthorn +3.5pts" "Collingwood +20.5pts" ## [229] "Melbourne -20.5pts" "Gold Coast +56.5pts" ## [231] "Port Adelaide -56.5pts" "Essendon -42.5pts" ## [233] "Fremantle +42.5pts" "Adelaide +2.5pts" ## [235] "West Coast -2.5pts" "Essendon +37.5pts" ## [237] "Sydney -37.5pts" "Geelong +16.5pts" ## [239] "Sydney -16.5pts" "GWS -16.5pts" ## [241] "West Coast +16.5pts" "GWS +9.5pts" ## [243] "Richmond -9.5pts" "Richmond -30.5pts" ## [245] "Brisbane +24.5pts" "St Kilda -24.5pts" ## [247] "Western Bullbogs" "GWS Giants -17.5pts" ## [249] "Western Bulldogs +17.5pts" "St Kilda -19.5pts" ## [251] "Gold Coast" "Carlton -13.5pts" ## [253] "Gold Coast +13.5pts" "Brisbane" ## [255] "Melbourne" "Brisbane +18.5pts" ## [257] "Melbourne -18.5pts" "Essendon -11.5pts" ## [259] "West Coast" "Port Adelaide" ## [261] "Sydney" "Port Adelaide +16.5pts" ## [263] "Geelong" "Hawthorn +8.5pts" ## [265] "Carlton +11.5pts" "Collingwood -11.5pts" ## [267] "Melbourne -21.5pts" "North Melbourne +21.5pts" ## [269] "Fremantle -11.5pts" "Gold Coast +11.5pts" ## [271] "Adelaide" "Adelaide -20.5pts" ## [273] "St Kilda +20.5pts" "Brisbane +52.5pts" ## [275] "Adelaide -31.5pts" "GWS" ## [277] "Fremantle +32.5pts" "GWS -32.5pts" ## [279] "Brisbane +42.5pts" "Richmond -42.5pts" ## [281] "Carlton +13.5pts" "North Melbourne -13.5pts" ## [283] "Gold Coast +36.5pts" "West Coast -36.5pts" ## [285] "Geelong -29.5pts" "St Kilda +29.5pts" ## [287] "Adelaide +26.5pts" "Sydney -26.5pts" ## [289] "Carlton +33.5pts" "West Coast -33.5pts" ## [291] "Geelong +14.5pts" "Port Adelaide -14.5pts" ## [293] "Fremantle -10.5pts" "Western Bulldogs +10.5pts" ## [295] "Hawthorn -12.5pts" "North Melbourne +12.5pts" ## [297] "Brisbane -6.5pts" "Gold Coast +6.5pts" ## [299] "Richmond -13.5pts" "Collingwood -5.5pts" ## [301] "Western Bulldogs -17.5pts" "Brisbane +38.5pts" ## [303] "GWS -38.5pts" "St Kilda +16.5pts" ## [305] "Adelaide -45.5pts" "North Melbourne +7.5pts" ## [307] "Port Adelaide -7.5pts" "Essendon +3.5pts" ## [309] "Melbourne -3.5pts" "Collingwood +13.5pts" ## [311] "Geelong -17.5pts" "GWS +17.5pts" ## [313] "Gold Coast +28.5pts" "Western Bulldogs -28.5pts" ## [315] "Essendon +13.5pts" "Hawthorn -13.5pts" ## [317] "Port Adelaide +4.5pts" "West Coast -4.5pts" ## [319] "North Melbourne +29.5pts" "Sydney -29.5pts" ## [321] "Adelaide -55.5pts" "Carlton +55.5pts" ## [323] "Richmond -35.5pts" "St Kilda +18.5pts" ## [325] "Brisbane +22.5pts" "Collingwood -22.5pts" ## [327] "Gold Coast +23.5pts" "Melbourne -23.5pts" ## [329] "North Melbourne +26.5pts" "Collingwood +9.5pts" ## [331] "Adelaide -23.5pts" "Western Bulldogs +23.5pts" ## [333] "Fremantle +34.5pts" "Sydney -34.5pts" ## [335] "Collingwood -25.5pts" "St Kilda +25.5pts" ## [337] "GWS +14.5pts" "Carlton +38.5pts" ## [339] "Melbourne -38.5pts" "Hawthorn -15.5pts" ## [341] "Richmond -2.5pts" "West Coast +2.5pts" ## [343] "Richmond -44.5pts" "St Kilda +44.5pts" ## [345] "Brisbane +19.5pts" "Sydney -19.5pts" ## [347] "Carlton +49.5pts" "Geelong -49.5pts" ## [349] "Adelaide +5.5pts" "Melbourne -5.5pts" ## [351] "Sydney -52.5pts" "Melbourne -37.5pts" ## [353] "Western Bulldogs +37.5pts" "Port Adelaide -3.5pts" ## [355] "Essendon +22.5pts" "Richmond -22.5pts" ## [357] "Brisbane +27.5pts" "North Melbourne -27.5pts" ## [359] "Adelaide -21.5pts" "GWS +21.5pts" ## [361] "Collingwood -39.5pts" "Fremantle +39.5pts" ## [363] "Port Adelaide -6.5pts" "Richmond +6.5pts" ## [365] "North Melbourne +15.5pts" "Gold Coast +46.5pts" ## [367] "GWS -46.5pts" "St Kilda +26.5pts" ## [369] "Fremantle +23.5pts" "Collingwood +10.5pts" ## [371] "Melbourne -10.5pts" "Sydney -13.5pts" ## [373] "West Coast +13.5pts" "Carlton +4.5pts" ## [375] "Fremantle -4.5pts" "Gold Coast +10.5pts" ## [377] "St Kilda -10.5pts" "Adelaide +14.5pts" ## [379] "Hawthorn -14.5pts" "Essendon +26.5pts" ## [381] "Melbourne +7.5pts" "Brisbane +20.5pts" ## [383] "GWS -20.5pts" "Western Bulldogs +27.5pts" ## [385] "Gold Coast +52.5pts" "Hawthorn -52.5pts" ## [387] "Carlton +47.5pts" "Collingwood -47.5pts" ## [389] "Richmond -12.5pts" "Sydney +12.5pts" ## [391] "Geelong -33.5pts" "Western Bulldogs +33.5pts" ## [393] "Carlton +35.5pts" "Port Adelaide -35.5pts" ## [395] "Collingwood -42.5pts" "Gold Coast +42.5pts" ## [397] "GWS -8.5pts" "Adelaide -14.5pts" ## [399] "West Coast +14.5pts" "Melbourne -39.5pts" ## [401] "St Kilda +39.5pts" "Fremantle -24.5pts" ## [403] "Geelong +7.5pts" "Adelaide +27.5pts" ## [405] "Richmond -27.5pts" "Brisbane -18.5pts" ## [407] "Carlton +18.5pts" "Essendon +9.5pts" ## [409] "Adelaide -2.5pts" "Geelong +2.5pts" ## [411] "Carlton +22.5pts" "St Kilda -22.5pts" ## [413] "Hawthorn -27.5pts" "Melbourne -42.5pts" ## [415] "Western Bulldogs +42.5pts" "GWS +11.5pts" ## [417] "Richmond -11.5pts" "Collingwood -14.5pts" ## [419] "Fremantle +29.5pts" "Port Adelaide -29.5pts" ## [421] "Richmond -34.5pts" "St Kilda +34.5pts" ## [423] "Adelaide -9.5pts" "Brisbane +9.5pts" ## [425] "Geelong -11.5pts" "Melbourne +11.5pts" ## [427] "Hawthorn -38.5pts" "GWS +8.5pts" ## [429] "Port Adelaide -8.5pts" "Essendon -3.5pts" ## [431] "Sydney +3.5pts" "Brisbane +31.5pts" ## [433] "Geelong -31.5pts" "GWS -39.5pts" ## [435] "Carlton +20.5pts" "Gold Coast -20.5pts" ## [437] "North Melbourne -8.5pts" "West Coast +8.5pts" ## [439] "Western Bulldogs +22.5pts" "Geelong +18.5pts" ## [441] "Richmond -18.5pts" "St Kilda -7.5pts" ## [443] "Brisbane -4.5pts" "North Melbourne +4.5pts" unique(df3$Team) ## [1] "Carlton" "Collingwood" ## [3] "Adelaide" "St Kilda" ## [5] "Sydney" "Gold Coast" ## [7] "Essendon" "North Melbourne" ## [9] "Fremantle" "Richmond" ## [11] "Western Bulldogs" "Hawthorn" ## [13] "Greater Western Sydney" "West Coast" ## [15] "Brisbane Lions" "Geelong" ## [17] "Melbourne" "Port Adelaide"
From here we can see that not all the team names are aligned so we need to fix them up, as an example we have the Adelaide Crows in 2017 and Adelaide in 2018. When we try and join to Squiggle we will miss out some games in 2017 because its trying to match Adelaide (from squiggle) with Adelaide Crows from betfair.
To confirm this we can use below
df_bookie%>%filter(Team %in% c("Adelaide Crows", "Adelaide")) ## # A tibble: 29 x 3 ## Date Team WAP ## <date> <chr> <chr> ## 1 2017-04-01 Adelaide Crows 1.8413133095902039 ## 2 2017-04-15 Adelaide Crows 1.1876003204498842 ## 3 2017-04-22 Adelaide Crows 1.2818905566923648 ## 4 2017-05-06 Adelaide Crows 1.1829114641026119 ## 5 2017-05-27 Adelaide Crows 1.19012452955724 ## 6 2017-06-02 Adelaide Crows 1.817133213487137 ## 7 2017-06-09 Adelaide Crows 1.1764993689503755 ## 8 2017-06-22 Adelaide Crows 1.116772609240404 ## 9 2017-07-30 Adelaide Crows 1.5478971535189769 ## 10 2017-08-18 Adelaide Crows 1.6920023163626901 ## # … with 19 more rows inner_join(df_bookie,df3, by=c("Team","Date"))%>% filter(Team %in% c("Adelaide Crows", "Adelaide")) ## # A tibble: 195 x 12 ## Date Team WAP source correct hconfidence margin err tip ## <date> <chr> <chr> <chr> <int> <dbl> <dbl> <dbl> <chr> ## 1 2018-04-07 Adel… 1.36… Squig… 1 31 24 25 Adel… ## 2 2018-04-07 Adel… 1.36… Aggre… 1 33.7 16.7 32.3 Adel… ## 3 2018-04-07 Adel… 1.36… The A… 1 34.8 14 35 Adel… ## 4 2018-04-07 Adel… 1.36… Punte… 1 27.1 NA NA Adel… ## 5 2018-04-07 Adel… 1.36… Graft 1 29.1 21 28 Adel… ## 6 2018-04-07 Adel… 1.36… PlusS… 1 42.2 12 37 Adel… ## 7 2018-04-07 Adel… 1.36… Matte… 1 31.1 18.9 30.1 Adel… ## 8 2018-04-07 Adel… 1.36… Footy… 1 33 22 27 Adel… ## 9 2018-04-07 Adel… 1.36… Live … 1 38.2 11 38 Adel… ## 10 2018-04-07 Adel… 1.36… Statt… 1 40.3 11 38 Adel… ## # … with 185 more rows, and 3 more variables: round <int>, year <int>, ## # H_A <chr>
So lets just fix up the team names
df_bookie<-df_bookie %>% mutate(Team=replace(Team,Team=="Adelaide Crows", "Adelaide")) %>% mutate(Team=replace(Team,Team=="Sydney Swans", "Sydney")) %>% mutate(Team=replace(Team,Team=="Gold Coast Suns", "Gold Coast")) %>% mutate(Team=replace(Team,Team=="GWS Giants", "Greater Western Sydney")) %>% mutate(Team=replace(Team,Team=="GWS", "Greater Western Sydney")) %>% mutate(Team=replace(Team,Team=="West Coast Eagles", "West Coast")) %>% mutate(Team=replace(Team,Team=="Brisbane", "Brisbane Lions")) %>% mutate(Team=replace(Team,Team=="Geelong Cats", "Geelong")) %>% mutate(Team=replace(Team,Team=="Melbourne Demons", "Melbourne")) %>% as.data.frame() df_joined<-left_join(df3,df_bookie, by=c("Team","Date"))
Step 5 Check the Join
So remember what we wanted originally, we wanted to join betting data to the squiggle data. Our squiggle data was a dataframe called df3
and our bookie data was a dataframe called df_bookie
. One way to check our join, is to make sure the dimension lines up.
We do this using dim
.
dim(df3) ## [1] 10734 11 dim(df_bookie) ## [1] 956 3 dim(df_joined) ## [1] 10734 12
Our dimension for df3 is 7842 rows and 11 columns, and our joined data df_joined
is 7842 rows and 12 columns, i.e. it is the same as df3 (squiggle data) but has an extra column (betfair odds)
So it seems as though we have a clean dataset to analyse. Now we just got to go ahead and do it!
A quick analysis
Someone asked me who do you think tips underdogs the best?
Well if you just wanted to know who has most winning underdog ‘bets’ (I don’t know if any of the squigglers bet).
df_joined%>%filter(WAP>2 &correct ==1 & Team==tip)%>% group_by(source)%>% summarise(count=n())%>% arrange(desc(count)) ## # A tibble: 14 x 2 ## source count ## <chr> <int> ## 1 HPN 12 ## 2 Footy Maths Institute 11 ## 3 Graft 10 ## 4 Stattraction 10 ## 5 Massey Ratings 9 ## 6 Figuring Footy 7 ## 7 Matter of Stats 7 ## 8 PlusSixOne 7 ## 9 Squiggle 7 ## 10 The Arc 7 ## 11 Aggregate 6 ## 12 Live Ladders 5 ## 13 Swinburne 3 ## 14 Punters 1
That tells me Footy Maths institute had the most winning underdog bets, but just how many times did they have to bet?
df_joined%>%filter(WAP>2 & Team==tip)%>% group_by(source, correct)%>% summarise(count=n())%>% group_by(source)%>% mutate(total_bets=sum(count), pert=count/total_bets)%>% filter(correct==1)%>% arrange(desc(pert)) ## # A tibble: 14 x 5 ## # Groups: source [14] ## source correct count total_bets pert ## <chr> <int> <int> <int> <dbl> ## 1 Punters 1 1 1 1 ## 2 Figuring Footy 1 7 11 0.636 ## 3 Massey Ratings 1 9 16 0.562 ## 4 Aggregate 1 6 12 0.5 ## 5 Footy Maths Institute 1 11 22 0.5 ## 6 HPN 1 12 24 0.5 ## 7 Squiggle 1 7 15 0.467 ## 8 Graft 1 10 22 0.455 ## 9 Live Ladders 1 5 11 0.455 ## 10 Stattraction 1 10 26 0.385 ## 11 Matter of Stats 1 7 19 0.368 ## 12 PlusSixOne 1 7 19 0.368 ## 13 The Arc 1 7 19 0.368 ## 14 Swinburne 1 3 14 0.214
Note we can automatically download the AFL file using the following script.
library(readxl) filetodownload <- "http://www.aussportsbetting.com/historical_data/afl.xlsx" download.file(filetodownload,"aflodds.xlsx",mode="wb") res <-read_excel("aflodds.xlsx", sheet = 1,skip = 1)
R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.