# ------------------------------------------------------------------------------ # -------- Example: ------------------------------------ # -------- Analysis of the canadian weather ------------------------------------ # -------- data (Ramsay & Silverman, 2005) ------------------------------------ # -------- using blockwise boosting ------------------------------------ # ------------------------------------------------------------------------------ # Get the data library(fda) tempav <- t(daily$tempav) annualprec <- log10(colSums(daily$precav)) # and BlockBoost functions source("http://www.statistik.lmu.de/~gertheiss/download/BlockBoost.r") # To get a rough idea of the right penalty we may employ # (leaving one out / generalized) cross validation of a # (generalized) ridge regression with difference penalty. genRidge <- function(y,x,omega,lambda) { # the generalized ridge regression with penalty matrix omega Hh <- (chol2inv(chol(t(x)%*%x + lambda*omega)))%*%t(x) betahat <- Hh%*%y H <- x%*%Hh Ih <- 1 - diag(H) yhat <- H%*%y yyhat <- y - yhat cv <- sum((yyhat/Ih)^2) gcv <- sum(yyhat^2)/sum(Ih)^2 return(list("betahat"=betahat,"yhat"=yhat,"res"=yyhat,"cv"=cv,"gcv"=gcv)) } # the penalty matrix p <- ncol(tempav) D1 <- cbind(diag(-1,p-1),0) + cbind(0,diag(1,p-1)) D1 <- rbind(c(1,rep(0,p-1)),D1,c(rep(0,p-1),1)) omega <- t(D1)%*%D1 omega <- cbind(0,omega) omega <- rbind(0,omega) # leaving one out cross validation loglam <- seq(1,13,0.5) cvScore <- numeric(length(loglam)) for (ilam in seq(along=loglam)) { lambda <- 10^loglam[ilam] testi <- genRidge(y=annualprec,x=cbind(1,tempav),omega=omega,lambda=lambda) cvScore[ilam] <- testi$cv } # We look at the results, and choose lambda=10^7 for the use in BlockBoost plot(loglam, cvScore, type="b", xlab=expression(log[10](lambda)), ylab="Cross-validation score") # BlockBoost with first difference penalty, block size k=30, # and selection criterion CV: BBnew <- blockboost(x=tempav,y=annualprec,k=30,penalty="diff", crit="CV",lambda=10^7,mfinal=50,constant=T) # We look at AIC values, plot(BBnew$aic,ylab="AIC",xlab="boosting iteration") # and choose 9 boosting iterations (the sparser model). plot(BBnew$Betahat[,9],type="l",xlab="day", ylab="regression coefficient",lwd=2) # In order to evaluate variability of BlockBoost we need # a function for the use in the boot package: BBboot <- function(Data,inds){ y <- Data[inds,1] x <- Data[inds,-1] BBb <- blockboost(x=x,y=y,k=30,penalty="diff",crit="CV",lambda=10^7, mfinal=50,constant=T) print(which.min(BBb$aic)) return(as.numeric(BBb$Betahat[,which.min(BBb$aic)])) } # the bootstrap based on 2000 samples (takes several hours!) library(boot) BBB <- boot(cbind(annualprec,tempav),BBboot,R=2000) CI <- matrix(NA,p,2) for (j in 1:p) { cij <- boot.ci(BBB,conf=0.9,index=j,type="perc") CI[j,1] <- cij$percent[1,4] CI[j,2] <- cij$percent[1,5] } # plot the results daytime <- 1:365 plot(daytime,BBnew$Betahat[,9],type="l",xlab="day", ylab="regression coefficient",lwd=2,ylim=c(-0.001,0.0016)) abline(h=0,lty=2) lines(daytime,CI[,1],lty=3,lwd=2) lines(daytime,CI[,2],lty=3,lwd=2)