One interface, (Almost) Every Classifier: unifiedml v0.2.1
[This article was first published on T. Moudiki's Webpage - R, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
A new version of unifiedml is out; available on CRAN. unifiedml is an effort to offer a unified interface to R’s machine learning models.
The main change in this version 0.2.1 is the removal of type (of prediction) from predict, and the use of... instead, which is more generic and flexible.
This post contains advanced examples of use of unifiedml for classification, with ranger and xgboost. More examples have been added to the package vignettes too.
install.packages("unifiedml")
install.packages(c("ranger"))
library("unifiedml")
Loading required package: doParallel
Loading required package: foreach
Loading required package: iterators
Loading required package: parallel
Loading required package: R6
1 – ranger example
library(ranger)
# 2 - 'ranger' classification ---------------------------
# -------------------------------
# S3 wrapper for ranger
# -------------------------------
# Fit function remains the same
my_ranger <- function(x, y, ...) {
if (!is.data.frame(x)) x <- as.data.frame(x)
y <- as.factor(y)
colnames(x) <- paste0("X", seq_len(ncol(x)))
df <- data.frame(y = y, x)
fit <- ranger::ranger(y ~ ., data = df, probability = TRUE, ...)
structure(list(fit = fit), class = "my_ranger")
}
# Predict only with newdata
predict.my_ranger <- function(object, newdata = NULL, newx = NULL, ...) {
if (!is.null(newx)) newdata <- newx
if (is.null(newdata)) stop("No data provided for prediction")
# misc::debug_print(newx)
# misc::debug_print(newdata)
if (is.matrix(newdata)) newdata <- as.data.frame(newdata)
# misc::debug_print(newdata)
# Unconditionally rename to match training
colnames(newdata) <- paste0("X", seq_len(ncol(newdata)))
# misc::debug_print(newdata)
preds <- predict(object$fit, data = newdata)$predictions
# misc::debug_print(newdata)
if (is.matrix(preds) && ncol(preds) == 2) {
lvls <- colnames(preds)
return(ifelse(preds[, 2] > 0.5, lvls[2], lvls[1]))
}
preds
}
# Print method
print.my_ranger <- function(x, ...) {
cat("my_ranger model\n")
print(x$fit)
}
# -------------------------------
# Example: Iris binary classification
# -------------------------------
set.seed(123)
iris_binary <- iris[iris$Species %in% c("setosa", "versicolor"), ]
X_binary <- iris_binary[, 1:4]
y_binary <- as.factor(as.character(iris_binary$Species))
# Train/test split
train_idx <- sample(seq_len(nrow(X_binary)), size = 0.7 * nrow(X_binary))
X_train <- X_binary[train_idx, ]
y_train <- y_binary[train_idx]
X_test <- X_binary[-train_idx, ]
y_test <- y_binary[-train_idx]
# Initialize and fit model
# Initialize model
mod <- Model$new(my_ranger)
# Fit on training data only
mod$fit(X_train, y_train, num.trees = 150L)
# Predict on test set
preds <- mod$predict(X_test)
# Evaluate
table(Predicted = preds, True =y_test)
mean(preds == y_test) # Accuracy
# 5-fold cross-validation on training set
cv_scores <- cross_val_score(
mod,
X_train,
y_train,
num.trees = 150L,
cv = 5L
)
cv_scores
mean(cv_scores) # average CV accuracy
True
Predicted setosa versicolor
setosa 15 0
versicolor 0 15
1
|======================================================================| 100%
- 1
- 1
- 1
- 1
- 1
1
2 - xgboost example
library(xgboost)
my_xgboost <- function(x, y, ...) {
# Convert to matrix safely
if (!is.matrix(x)) {
x <- as.matrix(x)
}
# Handle factors
if (is.factor(y)) {
y <- as.numeric(y) - 1
}
fit <- xgboost::xgboost(
data = x,
label = y,
...
)
structure(list(fit = fit), class = "my_xgboost")
}
predict.my_xgboost <- function(object, newdata, ...) {
# Ensure matrix
newdata <- as.matrix(newdata)
preds <- predict(object$fit, newdata)
# If binary classification → convert probs to class
if (!is.null(object$fit$params$objective) &&
grepl("binary", object$fit$params$objective)) {
return(ifelse(preds > 0.5, 1, 0))
}
preds
}
predict.my_xgboost <- function(object, newdata = NULL, newx = NULL, ...) {
# Accept both conventions
if (!is.null(newx)) {
newdata <- newx
}
newdata <- as.matrix(newdata)
preds <- predict(object$fit, newdata)
# Binary classification → class labels
if (!is.null(object$fit$params$objective) &&
grepl("binary", object$fit$params$objective)) {
return(ifelse(preds > 0.5, 1, 0))
}
preds
}
print.my_xgboost <- function(x, ...) {
cat("my_xgboost model\n")
print(x$fit)
}
set.seed(123) # for reproducibility
# Binary subset
iris_binary <- iris[iris$Species %in% c("setosa", "versicolor"), ]
X_binary <- as.matrix(iris_binary[, 1:4])
y_binary <- as.factor(as.character(iris_binary$Species))
# Split indices: 70% train, 30% test
train_idx <- sample(seq_len(nrow(X_binary)), size = 0.7 * nrow(X_binary))
X_train <- X_binary[train_idx, ]
y_train <- y_binary[train_idx]
X_test <- X_binary[-train_idx, ]
y_test <- y_binary[-train_idx]
# Initialize model
mod <- Model$new(my_xgboost)
# Fit on training data only
mod$fit(X_train, y_train, nrounds = 50, objective = "binary:logistic")
# Predict on test set
preds <- mod$predict(X_test)
# Evaluate
table(Predicted = preds, True =y_test)
mean(preds == y_test) # Accuracy
# 5-fold cross-validation on training set
cv_scores <- cross_val_score(
mod,
X_train,
y_train,
nrounds = 50,
objective = "binary:logistic",
cv = 5L
)
cv_scores
mean(cv_scores) # average CV accuracy

To leave a comment for the author, please follow the link and comment on their blog: T. Moudiki's Webpage - R.
R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.