######################################################################################### ######## Script for the real data analysis described in Boulesteix et al (2008) ######### ##################### Anne-Laure Boulesteix, February 5, 2008 ########################## ######################################################################################### # This script reproduces the real data analysis described in Boulesteix, Porzelius and Daumer (2008) # It uses the package MAclinical, which requires the installation of the packages party, plsgenomics and st. # The data set 'RosettaBreastData.RData' is borrowed from the package DENMARKLAB. # The data set contained in the file 'clinical.csv' is a copy in csv format of the supplementary table 1 available from the # nature website as supplementary info to the article by Vant'Veer et al (2002) library(MAclinical) clinical<-read.csv("clinical.csv",sep=";") clinical<-clinical[,-11] clinical<-clinical[1:78,] clinical<-clinical[,c(2,3,6,7,8,9)] clinical[,5]<-as.numeric(clinical[,5]>0) clinical[,6]<-as.numeric(clinical[,6]>0) clinical[,4]<-factor(clinical[,4]) clinical[,5]<-factor(clinical[,5]) clinical[,6]<-factor(clinical[,6]) load("RosettaBreastData.RData") y<-surv.resp y<-y[1:78] x<-t(rosetta.data.imp)[1:78,] z<-clinical learningsets<-generate.learningsets(n=78,method="MCCV",niter=100,nlearn=62) varsel<-matrix(0,200,4348) for (i in 1:100) { varsel[i,]<-order(abs(studentt.stat(X=x[learningsets[i,],],L=y[learningsets[i,]]+1)),decreasing=TRUE) } plsrf_xz_pv20<-testclass(x=x,z=z,y=y,learningsets=learningsets,varsel=varsel,nbgene=20,classifier=plsrf_xz_pv,ncomp=0:3) plsrf_xz_pv50<-testclass(x=x,z=z,y=y,learningsets=learningsets,varsel=varsel,nbgene=50,classifier=plsrf_xz_pv,ncomp=0:3) plsrf_xz_pv100<-testclass(x=x,z=z,y=y,learningsets=learningsets,varsel=varsel,nbgene=100,classifier=plsrf_xz_pv,ncomp=0:3) plsrf_xz_pv200<-testclass(x=x,z=z,y=y,learningsets=learningsets,varsel=varsel,nbgene=200,classifier=plsrf_xz_pv,ncomp=0:3) plsrf_xz20<-testclass(x=x,z=z,y=y,learningsets=learningsets,varsel=varsel,nbgene=20,classifier=plsrf_xz,ncomp=0:3) plsrf_xz50<-testclass(x=x,z=z,y=y,learningsets=learningsets,varsel=varsel,nbgene=50,classifier=plsrf_xz,ncomp=0:3) plsrf_xz100<-testclass(x=x,z=z,y=y,learningsets=learningsets,varsel=varsel,nbgene=100,classifier=plsrf_xz,ncomp=0:3) plsrf_xz200<-testclass(x=x,z=z,y=y,learningsets=learningsets,varsel=varsel,nbgene=200,classifier=plsrf_xz,ncomp=0:3) plsrf_x_pv20<-testclass(x=x,z=z,y=y,learningsets=learningsets,varsel=varsel,nbgene=20,classifier=plsrf_x_pv,ncomp=0:3) plsrf_x_pv50<-testclass(x=x,z=z,y=y,learningsets=learningsets,varsel=varsel,nbgene=50,classifier=plsrf_x_pv,ncomp=0:3) plsrf_x_pv100<-testclass(x=x,z=z,y=y,learningsets=learningsets,varsel=varsel,nbgene=100,classifier=plsrf_x_pv,ncomp=0:3) plsrf_x_pv200<-testclass(x=x,z=z,y=y,learningsets=learningsets,varsel=varsel,nbgene=200,classifier=plsrf_x_pv,ncomp=0:3) plsrf_x20<-testclass(x=x,z=z,y=y,learningsets=learningsets,varsel=varsel,nbgene=20,classifier=plsrf_x,ncomp=0:3) plsrf_x50<-testclass(x=x,z=z,y=y,learningsets=learningsets,varsel=varsel,nbgene=50,classifier=plsrf_x,ncomp=0:3) plsrf_x100<-testclass(x=x,z=z,y=y,learningsets=learningsets,varsel=varsel,nbgene=100,classifier=plsrf_x,ncomp=0:3) plsrf_x200<-testclass(x=x,z=z,y=y,learningsets=learningsets,varsel=varsel,nbgene=200,classifier=plsrf_x,ncomp=0:3) rf<-testclass(x=x,z=z,y=y,learningsets=learningsets,classifier=rf_z) svm_x20<-testclass(x=x,z=z,y=y,learningsets=learningsets,varsel=varsel,nbgene=20,classifier=svm_x) svm_x50<-testclass(x=x,z=z,y=y,learningsets=learningsets,varsel=varsel,nbgene=50,classifier=svm_x) svm_x100<-testclass(x=x,z=z,y=y,learningsets=learningsets,varsel=varsel,nbgene=100,classifier=svm_x) svm_x200<-testclass(x=x,z=z,y=y,learningsets=learningsets,varsel=varsel,nbgene=200,classifier=svm_x) logistic<-testclass(x=x,z=z,y=y,learningsets=learningsets,classifier=logistic_z)