Sometimes, you will find it necessary to work with several “shapviz” objects at the same time:
To simplify the workflow, {shapviz} introduces the “mshapviz” object (“m” like “multi”). You can create it in different ways:
shapviz()
on multiclass XGBoost or LightGBM
models.shapviz()
on “kernelshap” objects created from
multiclass/multioutput models.c(Mod_1 = s1, Mod_2 = s2, ...)
on “shapviz” objects
s1
, s2
, …mshapviz(list(Mod_1 = s1, Mod_2 = s2, ...))
The sv_*()
functions use the {patchwork} package to glue
the individual plots together.
library(xgboost)
library(ggplot2)
library(shapviz)
library(patchwork)
<- list(objective = "multi:softprob", num_class = 3)
params <- data.matrix(iris[, -5])
X_pred <- xgb.DMatrix(X_pred, label = as.integer(iris[, 5]) - 1)
dtrain <- xgb.train(params = params, data = dtrain, nrounds = 50) fit
Note that TreeSHAP produces SHAP values on link scale, i.e., on logit scale.
<- shapviz(fit, X_pred = X_pred, X = iris)
x
x#> 'mshapviz' object representing 3 'shapviz' objects:
#> 'Class_1': 150 x 4 SHAP matrix
#> 'Class_2': 150 x 4 SHAP matrix
#> 'Class_3': 150 x 4 SHAP matrix
# Contains "shapviz" objects for all classes
all.equal(x[[3]], shapviz(fit, X_pred = X_pred, X = iris, which_class = 3))
#> [1] TRUE
# Better names
names(x) <- levels(iris$Species)
x#> 'mshapviz' object representing 3 'shapviz' objects:
#> 'setosa': 150 x 4 SHAP matrix
#> 'versicolor': 150 x 4 SHAP matrix
#> 'virginica': 150 x 4 SHAP matrix
sv_force(x, row_id = 101)
sv_importance(x, kind = "bee") +
plot_layout(ncol = 1)
Here, we will use identical coordinate systems.
names(x) <- levels(iris$Species)
sv_dependence(x, v = "Petal.Length") +
plot_layout(ncol = 1) &
xlim(1, 7) &
ylim(-3, 4)
library(lightgbm)
# Model
<- list(objective = "multiclass", num_class = 3)
params <- data.matrix(iris[, -5])
X_pred <- lgb.Dataset(X_pred, label = as.integer(iris[, 5]) - 1)
dtrain <- lgb.train(params = params, data = dtrain, nrounds = 50)
fit
<- shapviz(fit, X_pred = X_pred, X = iris)
x
sv_importance(x, show_numbers = TRUE) +
plot_layout(ncol = 1) &
xlim(0, 2.25)
Since Kernel SHAP is model agnostic, we can get SHAP values on probability scale.
library(ranger)
library(kernelshap)
# Model
<- ranger(Species ~ ., data = iris, num.trees = 100, probability = TRUE)
fit
# "mshapviz" object
<- kernelshap(fit, X = iris[-5], bg_X = iris)
x <- setNames(shapviz(x), levels(iris$Species))
shp # all.equal(shp[[3]], shapviz(x, which_class = 3))
sv_dependence(shp, v = "Sepal.Width") +
plot_layout(ncol = 2) &
ylim(-0.025, 0.03)
Here, we want to compare SHAP dependence plots across Species subgroups.
<- data.matrix(iris[, -1])
X_pred <- xgb.DMatrix(X_pred, label = iris[, 1])
dtrain <- xgb.train(data = dtrain, nrounds = 50) fit_xgb
<- shapviz(fit_xgb, X_pred = X_pred, X = iris)
shap_xgb <- split(shap_xgb, f = iris$Species) x_subgroups
sv_dependence(x_subgroups, v = "Petal.Length") +
plot_layout(ncol = 1) &
xlim(1, 7) &
ylim(-1.4, 2.2)
In the last example, we used a regression model fitted via XGBoost. How does it compare with a linear regression?
library(kernelshap)
<- lm(Sepal.Length ~ ., data = iris)
fit_lm <- shapviz(kernelshap(fit_lm, iris[-1], bg_X = iris)) shap_lm
<- c(lm = shap_lm, xgb = shap_xgb)
mshap
mshap
#> 'mshapviz' object representing 2 'shapviz' objects:
#> 'lm': 150 x 4 SHAP matrix
#> 'xgb': 150 x 4 SHAP matrix
sv_importance(mshap)
sv_dependence(mshap, v = "Species") &
ylim(-0.5, 0.6)