Bayesian (nonlinear) adaptive learning

T. Moudiki

20 hours ago

[This article was first published on T. Moudiki's Webpage - R, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)

Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

A few weeks ago in #148, I talked about the next algorithm I’ll include in learningmachine: a Bayesian one described in this document, that learns in a way that’s most intuitive to us (online instead of batch).

Here is how it works (still not in learningmachine, but you can already try it):

utils::install.packages("remotes")
remotes::install_github("thierrymoudiki/bayesianrvfl")

library("bayesianrvfl")

# 0 - dataset -----
X <- as.matrix(mtcars[,-1])
y <- mtcars$mpg
(n <- dim(X)[1]); (p <- dim(X)[2])


# 1 - Split data set into 3 parts -----
set.seed(1)
(idx_train <- caret::createDataPartition(y, p = 0.6)$Resample1)
(idx_comp <- generics::setdiff(1:n, idx_train))
(idx_validation <- base::sample(x = idx_comp,
                               size = floor(length(idx_comp)/2),
                               replace = FALSE))
(idx_test <- generics::setdiff(idx_comp, idx_validation))

# check
print(generics::intersect(idx_train, idx_validation))
print(generics::intersect(c(idx_train, idx_validation), idx_test))

X_train <- X[idx_train,]
y_train <- y[idx_train]
X_validation <- X[idx_validation,]
y_validation <- y[idx_validation]
X_test <- X[idx_test,]
y_test <- y[idx_test]


# 2 - Fit the model -----
# choose the 'best' regularization parameter
# (there are many other ways, and there are also
# other parameters, type 'head(bayesianrvfl::fit_rvfl)')
obj_GCV <- bayesianrvfl::fit_rvfl(x = X_train, y = y_train)
(best_lambda <- obj_GCV$lambda[which.min(obj_GCV$GCV)])

(fit_obj <- bayesianrvfl::fit_rvfl(x = X_train,
                                   y = y_train,
                                   method = "solve",
                                   lambda = best_lambda,
                                   compute_Sigma = TRUE))


# 3 - Predict on validation set -----
preds_validation <- bayesianrvfl::predict_rvfl(fit_obj,
                                               newx = X_validation)
level <- 95
multiplier <- qnorm(1 - (100 - level)/200)
summary(preds_validation$mean - y_validation)
preds_upper <- preds_validation$mean + multiplier*preds_validation$sd
preds_lower <- preds_validation$mean - multiplier*preds_validation$sd
# coverage rate
mean((preds_upper >= y_validation)*(preds_lower <= y_validation))


# 4 - Update -----
# add new points in a online
fit_obj2 <- bayesianrvfl::update_params(fit_obj, newx = X[idx_test[1], ],
                                        newy = y[idx_test[1]])
fit_obj3 <- bayesianrvfl::update_params(fit_obj2, newx = X[idx_test[2], ],
                                        newy = y[idx_test[2]])
fit_obj4 <- bayesianrvfl::update_params(fit_obj3, newx = X[idx_test[3], ],
                                        newy = y[idx_test[3]])
fit_obj5 <- bayesianrvfl::update_params(fit_obj4, newx = X[idx_test[4], ],
                                        newy = y[idx_test[4]])
fit_obj6 <- bayesianrvfl::update_params(fit_obj5, newx = X[idx_test[5], ],
                                        newy = y[idx_test[5]])
fit_obj7 <- bayesianrvfl::update_params(fit_obj6, newx = X[idx_test[6], ],
                                        newy = y[idx_test[6]])

mat_coefs <- cbind(fit_obj$coef, fit_obj2$coef,
                    fit_obj3$coef, fit_obj4$coef,
                    fit_obj5$coef, fit_obj6$coef,
                   fit_obj7$coef)

preds_validation2 <- bayesianrvfl::predict_rvfl(fit_obj2,
                                                newx = X_validation)

preds_validation3 <- bayesianrvfl::predict_rvfl(fit_obj7,
                                                newx = X_validation)


# 5 - Plots -----

par(mfrow=c(3, 2))

plot(x = log(obj_GCV$lambda), y = obj_GCV$GCV, type='l',
     main = 'Generalized Cross-validation error',
     xlab = "log(lambda)", ylab = "GCV")

plot(y_validation, type='l', col="red",
     lwd=2,
     ylim = c(min(c(y_validation, preds_lower, preds_validation$mean)),
              max(c(y_validation, preds_upper, preds_validation$mean))),
     main = 'Out-of-sample credible intervals',
     xlab = "obs#", ylab = "prediction")
lines(preds_validation$mean, col="blue", lwd=2)
lines(preds_upper, col="gray")
lines(preds_lower, col="gray")

plot(x = y_validation,
     y = preds_validation$mean,
     ylim = c(min(c(y_validation, preds_lower, preds_validation$mean)),
              max(c(y_validation, preds_upper, preds_validation$mean))),
     main = 'observed vs predicted \n (before updates)',
     xlab = "observed", ylab = "predicted")
abline(a = 0, b = 1, col="green", lwd=2)

matplot(t(mat_coefs), type = 'l', lwd = 2,
        main = 'model coefficients \n after each update',
        xlab = "update#", ylab = "model coefficient")

plot(x = y_validation,
     y = preds_validation2$mean,
     ylim = c(min(c(y_validation, preds_lower, preds_validation$mean)),
              max(c(y_validation, preds_upper, preds_validation$mean))),
     main = 'observed vs predicted \n (after 6 point-updates)',
     xlab = "observed", ylab = "predicted")
abline(a = 0, b = 1, col="green", lwd=2)

matplot(t(preds_validation3$simulate(250)), type='l',
        lwd = 2, main = 'predictive posterior simulation \n (after 6 point-updates)',
        xlab = "obs#", ylab = "prediction")

To leave a comment for the author, please follow the link and comment on their blog: T. Moudiki's Webpage - R.

R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.

Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

Related