# ------------------------------------------------------------------------------ # -------- Example: ----------------------- # -------- Analysis of (a subset of) the prostate cancer ----------------------- # -------- data (Petricoin et al., 2002) ----------------------- # -------- using blockwise boosting ----------------------- # ------------------------------------------------------------------------------ # data import (the data must be in the current working directory) Intensity <- read.table("reducedData.txt",header=T,check.names=F) # m/z values mz <- as.numeric(names(Intensity)) # response class labels (cf. Petricoin et al., 2002) response <- c(rep(1,253),rep(2,69)) # plot mean intensity curves (apparently the data is already baseline corrected) plot(mz,apply(Intensity[response==1,],2,mean),type="l",ylab="Intensity", xlab="Mass/Charge") lines(mz,apply(Intensity[response==2,],2,mean),col=2,lty=2) # get the BlockBoost functions source("http://www.statistik.lmu.de/~gertheiss/download/logitBlockBoost.r") # since input to BlockBoost needs to be a matrix Intensity <- as.matrix(Intensity) # for illustration run BlockBoost with block size k=20 and penalty lambda=10^2 BBnew <- logitblockboost(xlearn=Intensity, ylearn=response-1, xtest=Intensity, k=20, penalty="diff", lambda=10^2, mfinal=40) # and plot the beta vector estimated after 20 and 30 iterations oldpar <- par(mfrow=c(2,1)) plot(mz,BBnew$Betahat[,20],type="l",xlab="Mass/Charge", ylab=expression(beta)) title("20 iterations") plot(mz,BBnew$Betahat[,30],type="l",xlab="Mass/Charge", ylab=expression(beta)) title("30 iterations") par(oldpar) # alternatively k, lambda and the number of # iterations can/should be determined via (3-fold) # cross validation (takes some time) k <- c(10,20,30) loglambda <- 0:4 mfinal <- 40 CVBlockBoost <- cvlogitblockboost(xlearn=Intensity, ylearn=response-1, k=k, lambda=10^loglambda, mfinal=mfinal, K=3, penalty="diff", printFold=F) # plot the results oldpar <- par(mfrow=c(1,3)) persp(x=1:mfinal,y=loglambda,z=t(CVBlockBoost[1,,]),theta=40,ticktype="detailed", xlab="Boosting Iteration",ylab="log(lambda)",zlab="CV Error") title("k = 10") persp(x=1:mfinal,y=loglambda,z=t(CVBlockBoost[2,,]),theta=40,ticktype="detailed", xlab="Boosting Iteration",ylab="log(lambda)",zlab="CV Error") title("k = 20") persp(x=1:mfinal,y=loglambda,z=t(CVBlockBoost[3,,]),theta=40,ticktype="detailed", xlab="Boosting Iteration",ylab="log(lambda)",zlab="CV Error") title("k = 30") par(oldpar)