### Supplemental R-Code for ### ### Exploratory Analysis of Benchmark Experiments: An Interactive ### Approach ### ### by Manuel J. A. Eugster and Friedrich Leisch. library(benchmark) ### Setup: ########################################################### ### Data: data(cars) str(cars) cars <- rbind(cars, c(30, 8)) plot(dist ~ speed, data=cars, xlab='Speed (mph)', ylab='Stopping distance (ft)', pch=19, cex=0.6) text(31, 14, 'modern car', pos=2) ### Algorithms: ## lm: mylm <- list(fit=function(d){lm(dist ~ speed, data=d)}, predict=predict.lm, plot=function(m, d=NULL, ..., col=2){abline(m, ..., col=col)}) ## rlm: require(MASS) myrlm <- list(fit=function(d){rlm(dist ~ speed, data=d)}, predict=MASS:::predict.rlm, plot=function(m, d=NULL, ..., col=3){abline(m, ..., col=col)}) ## cubic smoothing spline: myspl <- list(fit=function(d){smooth.spline(d)}, predict=function(...){stats:::predict.smooth.spline(...)$y}, plot=function(m, d=NULL, ..., col=4){lines(m, ..., col=col)}) ## lowess smoother: myloess <- list(fit=function(d){loess(dist ~ speed, data=d, control=loess.control(surface='direct'))}, predict=stats:::predict.loess, plot=function(m, d, ..., col=5) { a <- sort(d$speed, index.return=TRUE) x <- a$x y <- fitted(m)[a$ix] lines(x, y, ..., col=col) }) algorithms <- list(lm=mylm, rlm=myrlm, spl=myspl, loess=myloess) ### Execution: ####################################################### set.seed(1102) B <- 100 mse <- matrix(NA, nrow=B, ncol=length(algorithms), dimnames=list(NULL, names(algorithms))) lmse <- matrix(NA, nrow=B, ncol=length(algorithms), dimnames=list(NULL, names(algorithms))) ltime <- matrix(NA, nrow=B, ncol=length(algorithms), dimnames=list(NULL, names(algorithms))) beparts <- list() for ( i in seq_len(B) ) { b <- sample(nrow(cars), replace=TRUE) beparts[[i]] <- list(b=b, models=list()) for ( a in names(algorithms) ) { alg <- algorithms[[a]] ltime[i,a] <- sum(sytem.time(m <<- alg$fit(cars[b,]))) p <- alg$predict(m, cars[b,'speed',drop=FALSE]) lmse[i,a] <- mean((p - cars[b,'dist'])^2) p <- alg$predict(m, cars[-b,'speed',drop=FALSE]) mse[i,a] <- mean((p - cars[-b,'dist'])^2) beparts[[i]][['models']][[a]] <- m } } save(mse, lmse, beparts, file='be.RData') ### Analysis: ######################################################## ### Static benchplot: beplot(as.bench(mse), lines.show=TRUE, col=c('green', 'blue', 'cyan', 'magenta'), lines.col=rep('black', 4), ylab='MSE', dots.cex=0.5)