#### Supplemental R-Code for #### #### (Psycho-)Analysis of Benchmark Experiments: A Formal Framework #### for Investigating the Relationship between Data Sets and #### Learning Algorithms #### #### by Manuel J. A. Eugster, Friedrich Leisch and Carolin Strobl. library(benchmark) palette(c('black', '#F8766D', '#00BFC4')) ### Data generating process: linsep.dgp <- function(n) { n1 <- n / 4 x1 <- c(runif(n1, min = -2, max = -1), runif(n1, min = -1, max = 0), runif(n1, min = 0, max = 1), runif(n1, min = 1, max = 2)) x2 <- runif(n, min = -2, max = 2) c1 <- as.factor(rep(c(0, 1), c(n1 + n1, n1 + n1))) c2 <- as.factor(rep(c(0, 1, 1, 0), c(n1, n1, n1, n1))) list(linsep = data.frame(x1 = x1, x2 = x2, classes = c1), nlinsep = data.frame(x1 = x1, x2 = x2, classes = c2)) } ### Data sets and its characterisations: set.seed(1234) ds <- linsep.dgp(400) plot(x2 ~ x1, data = ds$linsep, col = as.integer(ds$linsep$classes) + 1, xlim = c(-2.5, 2.5), ylim = c(-2.5, 2.5), pch = 19, cex = 0.7) plot(x2 ~ x1, data = ds$nlinsep, col = as.integer(ds$nlinsep$classes) + 1, xlim = c(-2.5, 2.5), ylim = c(-2.5, 2.5), pch = 19, cex = 0.7) ds1 <- as.dataset(classes ~ ., ds$linsep) ds2 <- as.dataset(classes ~ ., ds$nlinsep) ds.ch <- c(characterize(ds1, statlog()), characterize(ds2, statlog())) plot(ds.ch) plot(ds.ch, facet = TRUE) ### Candidate algorithms: library(MASS) library(e1071) predict.lda <- function(object, newdata, ...) MASS:::predict.lda(object, newdata, ...)$class predict.qda <- function(object, newdata, ...) MASS:::predict.qda(object, newdata, ...)$class miscl <- function(yhat, y) 1 - classAgreement(table(yhat, y))$diag ### Benchmark: ds1.bec <- benchmark(ds1, c(svm, lda, qda), miscl, sub.sampling(100, 2/3), seed = 1305) ds2.bec <- benchmark(ds2, c(svm, lda, qda), miscl, sub.sampling(100, 2/3), seed = 1305) ## Raw performance: boxplot(ds1.bec$becp, ylim = c(0, 0.7), ylab = 'Misclassification', xlab = 'Candidate Algorithms', pch = 19, cex = 0.7, pars = list(medlwd = 1)) boxplot(ds2.bec$becp, ylim = c(0, 0.7), ylab = 'Misclassification', xlab = 'Candidate Algorithms', pch = 19, cex = 0.7, pars = list(medlwd = 1)) apply(ds1.bec$becp, 3, mean) apply(ds2.bec$becp, 3, mean) ## Data set characterizations: plot(ds.ch) xtable::xtable(t(ds.ch[, , drop = TRUE])) pushViewport(viewport(layout = grid.layout(ncol = 2))) print(plot(ds1.bec$becc), vp = viewport(layout.pos.row = 1, layout.pos.col = 1)) print(plot(ds2.bec$becc), vp = viewport(layout.pos.row = 1, layout.pos.col = 2)) ## BTree data: ds1.btd <- as.bttreedata(ds1.bec) ds2.btd <- as.bttreedata(ds2.bec) par(mfrow = c(1, 2)) plot(ds1.btd$preference) plot(ds2.btd$preference) ## BTree: btd <- rbind(ds1.btd, ds2.btd) tree1 <- bttree2(btd) plot(tree1, abbreviate = FALSE) worth(tree1)