[This article was first published on DataGeeek, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
Since April, Nvidia has tried to hold above the point forecasting line. As predictive intervals’ slopes are slightly up; could Nvidia continue an uptrend?
Source code:
library(tidyverse)
library(timetk)
library(tidymodels)
library(modeltime)
library(ggthemes)
#Nvidia (monthly)
df_nvidia <-
tq_get("NVDA", to = "2024-08-29") %>%
tq_transmute(select = close,
mutate_fun = to.monthly,
col_rename = "nvidia") %>%
mutate(date = as.Date(date))
#FEDFUNDS
df_fedfunds <- read_csv("fedfunds.csv")
#Tidying the data
df_fedfunds_tidy <-
df_fedfunds %>%
janitor::clean_names() %>%
select(date = release_date, fedfunds = actual) %>%
#Converts string to date object
mutate(date = case_when(
!is.na(parse_date(date, format = "%b %d, %Y")) ~ parse_date(date, format = "%b %d, %Y"),
!is.na(parse_date(date, format = "%d-%b-%y")) ~ parse_date(date, format = "%d-%b-%y")
)) %>%
#Removes the first three blank rows
slice_tail(n = -3) %>%
mutate(date = floor_date(date, "month") %m+% months(1),
fedfunds = str_remove(fedfunds, "%") %>% as.numeric()) %>%
#makes regular time series by filling the time gaps
pad_by_time(date, .by = "month") %>%
fill(fedfunds, .direction = "up") %>%
distinct(date, .keep_all = TRUE)
#Mergs all the data sets
df_merged <-
df_nvidia %>%
left_join(df_fedfunds_tidy)
#Modeling
#Splitting tha data
df_split <-
df_merged %>%
time_series_split(assess = "1 year",
cumulative = TRUE)
df_train <- training(df_split)
df_test <- testing(df_split)
#Bootstrapping for tuning
set.seed(12345)
df_folds <- bootstraps(df_train,
times = 100)
#Preprocessing
#Preprocessing for Boosting ARIMA
rec_arima_boost <-
recipe(nvidia ~ ., data = df_train) %>%
step_date(date, features = c("year", "month")) %>%
step_dummy(date_month, one_hot = TRUE) %>%
step_normalize(all_numeric_predictors())
#Preprocessing for XGBoost and MARS
rec_mars_xgboost <-
rec_arima_boost %>%
step_rm(date)
#Models
#Boosted ARIMA Regression
#(https://business-science.github.io/modeltime/reference/arima_boost.html)
mod_arima_boost <-
arima_boost(
min_n = tune(),
learn_rate = tune(),
trees = tune()
) %>%
set_engine(engine = "auto_arima_xgboost")
# Model 1: auto_arima ----
arima_reg() %>%
set_engine(engine = "auto_arima") %>%
fit(nvidia ~ ., data = df_merged)
#Multivariate adaptive regression splines (MARS) via earth
#(https://parsnip.tidymodels.org/reference/details_mars_earth.html)
mod_mars <-
mars(num_terms = tune(),
prune_method = tune()) %>%
set_engine("earth", nfold = 10) %>%
set_mode("regression")
#Boosted trees via xgboost
#(https://parsnip.tidymodels.org/reference/details_boost_tree_xgboost.html)
mod_boost_tree <-
boost_tree(mtry = tune(),
trees = tune(),
min_n = tune(),
learn_rate = tune()) %>%
set_engine("xgboost") %>%
set_mode("regression")
#Workflow sets
wflow_arima_boost <-
workflow_set(
preproc = list(rec_arima_boost = rec_arima_boost),
models = list(ARIMA_boost = mod_arima_boost)
)
wflow_mars_xgboost <-
workflow_set(
preproc = list(rec_mars_xgboost = rec_mars_xgboost),
models = list(MARS = mod_mars,
XGBoost = mod_boost_tree)
)
#Combining all the workflows
wflow_all <-
bind_rows(
wflow_arima_boost,
wflow_mars_xgboost
) %>%
#Making the workflow ID's a little more simple:
mutate(wflow_id = str_remove(wflow_id, "(rec_arima_boost_)|(rec_mars_xgboost_)"))
#Tuning and evaluating all the models
grid_ctrl <-
control_grid(
save_pred = TRUE,
parallel_over = "everything",
save_workflow = TRUE
)
grid_results <-
wflow_all %>%
workflow_map(
seed = 98765,
resamples = df_folds,
grid = 10,
control = grid_ctrl
)
#Accuracy of the grid results
grid_results %>%
rank_results(select_best = TRUE,
rank_metric = "rsq") %>%
select(Models = wflow_id, .metric, mean)
#Finalizing the model with the best parameters
best_param <-
grid_results %>%
extract_workflow_set_result("ARIMA_boost") %>%
select_best(metric = "rsq")
wflw_fit <-
grid_results %>%
extract_workflow("ARIMA_boost") %>%
finalize_workflow(best_param) %>%
fit(df_train)
#Calibration data
df_cal <-
wflw_fit %>%
modeltime_calibrate(new_data = df_test)
#Predictive intervals for Boosted ARIMA
df_cal %>%
modeltime_forecast(actual_data = df_merged %>%
filter(date >= last(date) - months(12)),
new_data = df_test) %>%
plot_modeltime_forecast(.interactive = FALSE,
.legend_show = FALSE,
.line_size = 1,
.color_lab = "",
.title = "Predictive Intervals for Nvidia") +
labs(subtitle = "Monthly Stock Prices<br><span style = 'color:red;'>Boosted ARIMA Point Forecasting Line</span>") +
scale_x_date(breaks = c(make_date(2023,8,1),
make_date(2024,4,1),
make_date(2024,8,1)),
labels = scales::label_date(format = "%Y %b"),
expand = expansion(mult = c(.1, .1))) +
theme_wsj(base_family = "Bricolage Grotesque",
color = "grey",
base_size = 12) +
theme(legend.position = "none",
plot.subtitle = ggtext::element_markdown(size = 17, face = "bold"))
To leave a comment for the author, please follow the link and comment on their blog: DataGeeek.
R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
