Comparing Many Models: Crude Oil Futures Price

[This article was first published on DataGeeek, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

Crude oil futures prices have fluctuated above the point forecast line (XGBoost) year to date.

library(tidyverse)
library(tidymodels)
library(tidyquant)
library(timetk)
library(modeltime)
library(workflowsets)


#Crude Oil Futures(USD) (Index 2014 = 100)
df_crude_oil <- 
  tq_get("CL=F") %>% 
  tq_transmute(select = close,
               mutate_fun = to.monthly,
               col_rename = "crude_oil") %>% 
  mutate(date = as.Date(date))


#Federal Funds Effective Rate
df_fed_funds <- 
  tq_get("FEDFUNDS", get = "economic.data") %>% 
  select(date, fed_funds = price)

#Industrial Production: Mining: Crude Oil (NAICS = 21112) (Index 2014 = 100)
df_crude_oil_production <- 
  tq_get("IPG21112S", get = "economic.data") %>% 
  select(date, crude_oil_production = price) 

#Merging all the data sets
df_merged <- 
  df_crude_oil %>%
  left_join(df_fed_funds) %>% 
  left_join(df_crude_oil_production) %>% 
  drop_na()


#Splitting tha data
df_split <- 
  df_merged %>% 
  time_series_split(assess = "1 year",
                    cumulative = TRUE)

df_train <- training(df_split)
df_test <- testing(df_split)

#Bootstrapping for tuning
set.seed(12345)
df_folds <- bootstraps(df_train,
                       times = 100)


#Preprocessing
rec_all <- 
  recipe(crude_oil ~ ., data = df_train) %>% 
  step_mutate(date_num = as.numeric(date)) %>% 
  step_rm(date) %>% 
  step_normalize(all_numeric_predictors()) 
  

#Models

#Radial basis function support vector machines (SVMs) via kernlab
mod_svm_rbf <- 
  svm_rbf(cost = tune(),
          rbf_sigma = tune(), 
          margin = tune()) %>%  
  set_engine("kernlab") %>% 
  set_mode("regression")


#Multivariate adaptive regression splines (MARS) via earth
mod_mars <- 
  mars(num_terms = tune(), 
       prune_method = tune()) %>% 
  set_engine("earth", nfold = 10) %>% 
  set_mode("regression") 


#Boosted trees via xgboost
mod_boost_tree <- 
  boost_tree(mtry = tune(), 
             trees = tune(), 
             min_n = tune(), 
             learn_rate = tune()) %>%
  set_engine("xgboost") %>%
  set_mode("regression") 



#Workflow sets
wflow_all <- 
  workflow_set(
    preproc = list(svm_all = rec_all),
    models = list(SVM_rbf = mod_svm_rbf,
                  MARS = mod_mars,
                  XGBoost = mod_boost_tree)) %>% 
  #Making the workflow ID's a little more simple:
  mutate(wflow_id = str_remove(wflow_id, "svm_all_"))



#Tuning and evaluating all the models
grid_ctrl <-
  control_grid(
    save_pred = TRUE,
    parallel_over = "everything",
    save_workflow = TRUE
  )

grid_results <-
  wflow_all %>%
  workflow_map(
    seed = 98765,
    resamples = df_folds,
    grid = 10,
    control = grid_ctrl
  )


#Accuracy of the grid results
grid_results %>% 
  rank_results(select_best = TRUE, 
               rank_metric = "rsq") %>%
  select(Models = wflow_id, .metric, mean)


#Finalizing the model with the best parameters
best_param <- 
  grid_results %>%
  extract_workflow_set_result("XGBoost") %>% 
  select_best(metric = "rsq")


wflw_fit <- 
  grid_results %>% 
  extract_workflow("XGBoost") %>% 
  finalize_workflow(best_param) %>% 
  fit(df_train)

#Calibration data
df_cal <- 
  wflw_fit %>% 
  modeltime_calibrate(new_data = df_test)

#Predictive intervals for XGBoost
df_cal %>%
  modeltime_forecast(actual_data = df_merged %>% 
                                   filter(date >= last(date) - months(12)),
                     new_data = df_test) %>%
  plot_modeltime_forecast(.interactive = FALSE,
                          .legend_show = FALSE,
                          .line_size = 1,
                          .color_lab = "",
                          .title = "Confidence Intervals for Crude Oil Future Prices (WTI)") +
  labs(subtitle = "Using XGBoost") + 
  theme_minimal(base_family = "Bricolage Grotesque",
                base_size = 16)

To leave a comment for the author, please follow the link and comment on their blog: DataGeeek.

R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

Never miss an update!
Subscribe to R-bloggers to receive
e-mails with the latest R posts.
(You will not see this message again.)

Click here to close (This popup will not appear again)