Site icon R-bloggers

Maths in Sport Script

[This article was first published on Analysis of AFL, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
############################################################################
#BROWNLOW PREDICTION WITH FREE DATA!!!!

        ################
        ### 

##from fitzRoy figures
# ptm <- proc.time()
library(tidyverse)
df<-fitzRoy::get_afltables_stats(start_date = "1897-01-01", end_date = Sys.Date())
names(df)
# df<-afldata::afldata
team_stats<-df%>%
  dplyr::select(Date, First.name,Surname,Season, Round, Playing.for, Kicks:Goal.Assists)%>%
  group_by(Date, Season, Round, Playing.for)%>%
  summarise_if(is.numeric,funs(sum=c(sum(.))))

player_stats<-df%>%
  dplyr::select(Date, First.name,Surname,Season, Round, Playing.for, Kicks:Goal.Assists)

complete_df<-left_join(player_stats,team_stats, by=c("Date"="Date", "Season"="Season",  "Playing.for"="Playing.for"))

#but we also need margins as per honours stuff

dataset_scores<-fitzRoy::match_results
names(dataset_scores)
dataset_scores1<-dataset_scores%>%dplyr::select (Date, Round, Home.Team, Home.Points,Game)
dataset_scores2<-dplyr::select(dataset_scores, Date, Round, Away.Team, Away.Points,Game)

colnames(dataset_scores1)[3]<-"Team"
colnames(dataset_scores1)[4]<-"Points"
colnames(dataset_scores2)[3]<-"Team"
colnames(dataset_scores2)[4]<-"Points"

df5<-rbind(dataset_scores1,dataset_scores2)
dataset_margins<-df5%>%group_by(Game)%>%
  arrange(Game)%>%
  mutate(margin=c(-diff(Points),diff(Points)))
# View(dataset_margins)
dataset_margins$Date<-as.Date(dataset_margins$Date)
complete_df$Date<-as.Date(complete_df$Date)

complete_df<-left_join(complete_df,dataset_margins,by=c("Date"="Date",  "Playing.for"="Team"))


complete_df_ratio<-complete_df%>%
  mutate(kick.ratio=Kicks/Kicks_sum,
         Marks.ratio=Marks/Marks_sum,
         handball.ratio=Handballs/Handballs_sum,
         Goals.ratio=Goals/Goals_sum,
         behinds.ratio=Behinds/Behinds_sum,
         hitouts.ratio=Hit.Outs/Hit.Outs_sum,
         tackles.ratio=Tackles/Tackles_sum,
         rebounds.ratio=Rebounds/Rebounds_sum,
         inside50s.ratio=Inside.50s/Inside.50s_sum,
         clearances.ratio=Clearances/Clearances_sum,
         clangers.ratio=Clangers/Clangers_sum,
         freefors.ratio=Frees.For/Frees.For_sum,
         freesagainst.ratio=Frees.Against/Frees.Against_sum,
         Contested.Possessions.ratio=Contested.Possessions/Contested.Possessions_sum,
         Uncontested.Possessions.ratio=Uncontested.Possessions/Uncontested.Possessions_sum,
         contested.marks.ratio=Contested.Marks/Contested.Marks_sum,
         marksinside50.ratio=Marks.Inside.50/Marks.Inside.50_sum,
         one.percenters.ratio=One.Percenters/One.Percenters_sum,
         bounces.ratio=Bounces/Bounces_sum,
         goal.assists.ratio=Goal.Assists/Goal.Assists_sum,
         disposals.ratio=(Kicks+Handballs)/(Kicks_sum+Handballs_sum))
df<-complete_df_ratio%>%dplyr::select(Date, First.name, Surname, Season, Round.x, Playing.for,-Brownlow.Votes, Brownlow.Votes_sum,everything())
df<-df%>%dplyr::select(-Brownlow.Votes,everything())
df[is.na(df)] <- 0
in.sample  <- subset(df, Season %in% c(2013:2016))

in.sample$Brownlow.Votes <- factor(in.sample$Brownlow.Votes)

in.sample<-in.sample%>%filter(Round.x %in% c("1","2","3","4","5","6","7","8",
                                             "9","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24"))


names(in.sample)

in.sample$Player<-paste(in.sample$First.name,in.sample$Surname)

in.sample<-in.sample%>%dplyr::select(Player, Date, Season, Round.x, Playing.for, margin:Brownlow.Votes)




library(ordinal)

fm1<-clm(Brownlow.Votes~ kick.ratio +  handball.ratio +  Marks.ratio +  
           disposals.ratio+  hitouts.ratio+
           freefors.ratio +  freesagainst.ratio +  tackles.ratio +  Goals.ratio +   behinds.ratio + Contested.Possessions.ratio+
           Uncontested.Possessions.ratio +  clangers.ratio +    contested.marks.ratio + marksinside50.ratio +
           clearances.ratio +   rebounds.ratio +    inside50s.ratio +   one.percenters.ratio +  bounces.ratio+
           goal.assists.ratio  +margin, 
         data = in.sample)

library(MASS)

fm2<- stepAIC(fm1, direction='backward',type=AIC)

                      ####################
                    ###Get the out.sample

### Example using data from footywire to show that in fitzRoy 
### fans have access to both popular websites

names(fitzRoy::player_stats)
df_2017<-fitzRoy::player_stats%>%
  filter(Season==2017)

team_stats_out<-df_2017%>%
  dplyr::select(Date, Player,Season, Round, Team, CP:T5)%>%
  group_by(Date,Season, Round, Team)%>%
  summarise_if(is.numeric,funs(sum=c(sum(.))))

player_stats_out<-df_2017%>%
  dplyr::select(Date, Player,Season, Round, Team, CP:T5)


complete_df_out<-left_join(player_stats_out,team_stats_out, by=c("Date"="Date", "Season"="Season",  "Team"="Team"))



dataset_scores<-fitzRoy::match_results
names(dataset_scores)
dataset_scores1<-dataset_scores%>%dplyr::select (Date, Round, Home.Team, Home.Points,Game)
dataset_scores2<-dplyr::select(dataset_scores, Date, Round, Away.Team, Away.Points,Game)

colnames(dataset_scores1)[3]<-"Team"
colnames(dataset_scores1)[4]<-"Points"
colnames(dataset_scores2)[3]<-"Team"
colnames(dataset_scores2)[4]<-"Points"


df5<-rbind(dataset_scores1,dataset_scores2)
dataset_margins<-df5%>%group_by(Game)%>%
  arrange(Game)%>%
  mutate(margin=c(-diff(Points),diff(Points)))
dataset_margins$Date<-as.Date(dataset_margins$Date)
complete_df_out$Date<-as.Date(complete_df_out$Date)

dataset_margins<-dataset_margins %>%mutate(Team = str_replace(Team, "Brisbane Lions", "Brisbane"))

dataset_margins<-dataset_margins %>%mutate(Team = str_replace(Team, "Footscray", "Western Bulldogs"))


complete_df_out<-left_join(complete_df_out,dataset_margins,by=c("Date"="Date",  "Team"="Team"))

names(complete_df_out)

####create the new ratios
complete_df_ratio_out<-complete_df_out%>%
  mutate(kick.ratio=K/K_sum,
         Marks.ratio=M/M_sum,
         handball.ratio=HB/HB_sum,
         Goals.ratio=G/G_sum,
         behinds.ratio=B/B_sum,
         hitouts.ratio=HO/HO_sum,
         tackles.ratio=T/T_sum,
         rebounds.ratio=R50/R50_sum,
         inside50s.ratio=I50/I50_sum,
         clearances.ratio=(CCL+SCL)/(CCL_sum+SCL_sum),
         clangers.ratio=CL/CL_sum,
         freefors.ratio=FF/FF_sum,
         freesagainst.ratio=FA/FA_sum,
         Contested.Possessions.ratio=CP/CP_sum,
         Uncontested.Possessions.ratio=UP/UP_sum,
         contested.marks.ratio=CM/CM_sum,
         marksinside50.ratio=MI5/MI5_sum,
         one.percenters.ratio=One.Percenters/One.Percenters_sum,
         bounces.ratio=BO/BO_sum,
         goal.assists.ratio=GA/GA_sum,
         disposals.ratio=D/D_sum)




conforming<-complete_df_ratio_out%>%
  dplyr::select(Player, Date, Season, Round.x, Team, margin, 
                kick.ratio:disposals.ratio)

conforming$Brownlow.Votes<-0
out.sample=conforming

newdata   <- out.sample[ , -ncol(out.sample)]


pre.dict    <- predict(fm2,newdata=newdata, type='prob')
pre.dict.m  <- data.frame(matrix(unlist(pre.dict), nrow= nrow(newdata)))
colnames(pre.dict.m) <- c("vote.0", "vote.1", "vote.2", "vote.3")

newdata.pred  <- cbind.data.frame(newdata, pre.dict.m)


#### Step 1: Get expected value on Votes
newdata.pred$expected.votes <- newdata.pred$vote.1 + 2*newdata.pred$vote.2 + 3*newdata.pred$vote.3

####Join back on matchID whoops!


get_match_ID<-fitzRoy::player_stats

xx<-get_match_ID%>%dplyr::select(Date, Player, Match_id)
newdata.pred<-left_join(newdata.pred, xx, by=c("Date"="Date",  "Player"="Player"))



newdata.pred<-filter(newdata.pred, Date<"2017-09-01")


sum1 <- aggregate(vote.1~Match_id, data = newdata.pred, FUN = sum ); names(sum1) <- c("Match_id", "sum.vote.1");
sum2 <- aggregate(vote.2~Match_id, data = newdata.pred, FUN = sum ); names(sum2) <- c("Match_id", "sum.vote.2");
sum3 <- aggregate(vote.3~Match_id, data = newdata.pred, FUN = sum ); names(sum3) <- c("Match_id", "sum.vote.3");

#### Step 3: Add sum of each vote by matchId to big table
newdata.pred <- merge(newdata.pred, sum1, by = "Match_id")
newdata.pred <- merge(newdata.pred, sum2, by = "Match_id")
newdata.pred <- merge(newdata.pred, sum3, by = "Match_id")

#### Step 4: Add std1/2/3
newdata.pred$std.1  <- (newdata.pred$sum.vote.1/newdata.pred$vote.1)^-1
newdata.pred$std.2  <- (newdata.pred$sum.vote.2/newdata.pred$vote.2)^-1
newdata.pred$std.3  <- (newdata.pred$sum.vote.3/newdata.pred$vote.3)^-1


#### Step 5: Expected standard game vote
newdata.pred$exp_std_game_vote <- newdata.pred$std.1 + 2*newdata.pred$std.2 + 3*newdata.pred$std.3  


#### Step 6: List of winners

newdata.pred$PlayerName<-paste(newdata.pred$Player," ",newdata.pred$Team)
winners.stdgame   <- aggregate(exp_std_game_vote~PlayerName, data = newdata.pred, FUN = sum );
winners.stdgame   <- winners.stdgame[order(-winners.stdgame$exp_std_game_vote), ]
winners.stdgame[1:10, ]

# proc.time() - ptm

To leave a comment for the author, please follow the link and comment on their blog: Analysis of AFL.

R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.