Comparing Many Models: An Uptrend for Nvidia?

Selcuk Disci

1 hour ago

[This article was first published on DataGeeek, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)

Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

Since April, Nvidia has tried to hold above the point forecasting line. As predictive intervals’ slopes are slightly up; could Nvidia continue an uptrend?

Source code:

library(tidyverse)
library(timetk)
library(tidymodels)
library(modeltime)
library(ggthemes)

#Nvidia (monthly)
df_nvidia <- 
  tq_get("NVDA", to = "2024-08-29") %>%
  tq_transmute(select = close,
               mutate_fun = to.monthly,
               col_rename = "nvidia") %>% 
  mutate(date = as.Date(date))



#FEDFUNDS
df_fedfunds <- read_csv("fedfunds.csv")

#Tidying the data
df_fedfunds_tidy <- 
  df_fedfunds %>% 
  janitor::clean_names() %>% 
  select(date = release_date, fedfunds = actual) %>% 
  #Converts string to date object
  mutate(date = case_when(
    !is.na(parse_date(date, format = "%b %d, %Y")) ~ parse_date(date, format = "%b %d, %Y"),
    !is.na(parse_date(date, format = "%d-%b-%y")) ~ parse_date(date, format = "%d-%b-%y")
  )) %>% 
  #Removes the first three blank rows
  slice_tail(n = -3) %>% 
  mutate(date = floor_date(date, "month") %m+% months(1),
         fedfunds = str_remove(fedfunds, "%") %>% as.numeric()) %>% 
  #makes regular time series by filling the time gaps
  pad_by_time(date, .by = "month") %>% 
  fill(fedfunds, .direction = "up") %>% 
  distinct(date, .keep_all = TRUE)

#Mergs all the data sets
df_merged <- 
  df_nvidia %>% 
  left_join(df_fedfunds_tidy)


#Modeling

#Splitting tha data
df_split <- 
  df_merged %>% 
  time_series_split(assess = "1 year",
                    cumulative = TRUE)

df_train <- training(df_split)
df_test <- testing(df_split)

#Bootstrapping for tuning
set.seed(12345)
df_folds <- bootstraps(df_train,
                       times = 100)


#Preprocessing

#Preprocessing for Boosting ARIMA
rec_arima_boost <- 
  recipe(nvidia ~ ., data = df_train) %>% 
  step_date(date, features = c("year", "month")) %>% 
  step_dummy(date_month, one_hot = TRUE) %>% 
  step_normalize(all_numeric_predictors())

#Preprocessing for XGBoost and MARS
rec_mars_xgboost <- 
  rec_arima_boost %>% 
  step_rm(date)


#Models

#Boosted ARIMA Regression
#(https://business-science.github.io/modeltime/reference/arima_boost.html)
mod_arima_boost <- 
  arima_boost(
    min_n = tune(),
    learn_rate = tune(),
    trees = tune()
  ) %>%
  set_engine(engine = "auto_arima_xgboost")

# Model 1: auto_arima ----
arima_reg() %>%
  set_engine(engine = "auto_arima") %>%
  fit(nvidia ~ ., data = df_merged)



#Multivariate adaptive regression splines (MARS) via earth
#(https://parsnip.tidymodels.org/reference/details_mars_earth.html)
mod_mars <- 
  mars(num_terms = tune(), 
       prune_method = tune()) %>% 
  set_engine("earth", nfold = 10) %>% 
  set_mode("regression") 


#Boosted trees via xgboost
#(https://parsnip.tidymodels.org/reference/details_boost_tree_xgboost.html)
mod_boost_tree <- 
  boost_tree(mtry = tune(), 
             trees = tune(), 
             min_n = tune(), 
             learn_rate = tune()) %>%
  set_engine("xgboost") %>%
  set_mode("regression") 



#Workflow sets

wflow_arima_boost <- 
  workflow_set(
    preproc = list(rec_arima_boost = rec_arima_boost),
    models = list(ARIMA_boost = mod_arima_boost)
  ) 

wflow_mars_xgboost <- 
  workflow_set(
    preproc = list(rec_mars_xgboost = rec_mars_xgboost),
    models = list(MARS = mod_mars,
                  XGBoost = mod_boost_tree)
  )

#Combining all the workflows
wflow_all <- 
  bind_rows(
    wflow_arima_boost,
    wflow_mars_xgboost
  ) %>% 
  #Making the workflow ID's a little more simple:
  mutate(wflow_id = str_remove(wflow_id, "(rec_arima_boost_)|(rec_mars_xgboost_)"))



#Tuning and evaluating all the models
grid_ctrl <-
  control_grid(
    save_pred = TRUE,
    parallel_over = "everything",
    save_workflow = TRUE
  )

grid_results <-
  wflow_all %>%
  workflow_map(
    seed = 98765,
    resamples = df_folds,
    grid = 10,
    control = grid_ctrl
  )


#Accuracy of the grid results
grid_results %>% 
  rank_results(select_best = TRUE, 
               rank_metric = "rsq") %>%
  select(Models = wflow_id, .metric, mean)


#Finalizing the model with the best parameters
best_param <- 
  grid_results %>%
  extract_workflow_set_result("ARIMA_boost") %>% 
  select_best(metric = "rsq")


wflw_fit <- 
  grid_results %>% 
  extract_workflow("ARIMA_boost") %>% 
  finalize_workflow(best_param) %>% 
  fit(df_train)

#Calibration data
df_cal <- 
  wflw_fit %>% 
  modeltime_calibrate(new_data = df_test)

#Predictive intervals for Boosted ARIMA
df_cal %>%
  modeltime_forecast(actual_data = df_merged %>% 
                       filter(date >= last(date) - months(12)),
                     new_data = df_test) %>%
  plot_modeltime_forecast(.interactive = FALSE,
                          .legend_show = FALSE,
                          .line_size = 1,
                          .color_lab = "",
                          .title = "Predictive Intervals for Nvidia") +
  labs(subtitle = "Monthly Stock Prices<br><span style = 'color:red;'>Boosted ARIMA Point Forecasting Line</span>") + 
  scale_x_date(breaks = c(make_date(2023,8,1), 
                          make_date(2024,4,1),
                          make_date(2024,8,1)),
               labels = scales::label_date(format = "%Y %b"),
               expand = expansion(mult = c(.1, .1))) +
  theme_wsj(base_family = "Bricolage Grotesque",
            color = "grey",
            base_size = 12) +
  theme(legend.position = "none",
        plot.subtitle = ggtext::element_markdown(size = 17, face = "bold"))

To leave a comment for the author, please follow the link and comment on their blog: DataGeeek.

R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.

Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

Related