Nothing
## ----echo=FALSE,results='hide'----------------------------
library("knitr")
options(prompt = "R> ", continue = " ", width = 60, useFancyQuotes = FALSE)
opts_chunk$set(tidy=TRUE, highlight=FALSE, background="white")
opts_chunk$set(dev=c('pdf','postscript'), fig.path='cancerfigure/')
## ----message=FALSE, eval=FALSE----------------------------
# #The data files below were downloaded on June 1, 2016
# require("gdata")
# bc <- t(read.delim("GSE20194_series_matrix.txt.gz",sep="",header=FALSE,skip=80))
# colnames(bc) <- bc[1,]
# bc <- bc[-1, -c(1, 2)]
# ###The last column is empty with variable name !series_matrix_table_end, thus omitted
# bc <- bc[, -22284]
# mode(bc) <- "numeric" ### convert character to numeric
# dat1 <- read.xls("GSE20194_MDACC_Sample_Info.xls", sheet=1, header=TRUE)
# y <- dat1$characteristics..ER_status
# y <- ifelse(y=="P", 1, -1)
# table(y)
# res <- rep(NA, dim(bc)[2])
# for(i in 1:dim(bc)[2])
# res[i] <- abs(t.test(bc[,i]~y)$statistic)
# ### find 3000 largest absolute value of t-statistic
# tmp <- order(res, decreasing=TRUE)[1:3000]
# dat <- bc[, tmp]
# ### standardize variables
# dat <- scale(dat)
## ----message=FALSE----------------------------------------
nrun <- 100
per <- c(0, 0.05, 0.10, 0.15)
learntype <- c("tree", "ls")[2]
tuning <- "error"
n.cores <- 4
plot.it <- TRUE
### robust tuning parameters used in bst/rbst function
s <- c(0.9, 1.01,0.5, -0.2, 0.8, -0.5, -0.2)
nu <- c(0.01, 0.1, 0.01, rep(0.1, 4))
m <- 100 ### boosting iteration number
### whether to truncate the predicted values in each boosting iteration?
ctr.trun <- c(TRUE, rep(FALSE, 6))
### used in bst function
bsttype <- c("closs", "gloss", "qloss", "binom", "binom", "hinge", "expo")
### and corresponding labels
bsttype1 <- c("ClossBoost", "GlossBoost", "QlossBoost", "LogitBoost", "LogitBoost", "HingeBoost", "AdaBoost")
### used in rbst function
rbsttype <- c("closs", "gloss", "qloss", "tbinom", "binomd", "thinge", "texpo")
### and corresponding labels
rbsttype1 <- c("ClossBoostQM", "GlossBoostQM", "QlossBoostQM", "TLogitBoost", "DlogitBoost", "THingeBoost", "TAdaBoost")
## ----cancer, fig.show='hold', eval=FALSE------------------
# summary7 <- function(x) c(summary(x), sd=sd(x))
# ptm <- proc.time()
# for(k in 1:7){ ### k controls which family in bst, and rfamily in rbst
# err.m1 <- err.m2 <- nvar.m1 <- nvar.m2 <- errbest.m1 <- errbest.m2 <- matrix(NA, ncol=4, nrow=nrun)
# mstopbest.m1 <- mstopbest.m2 <- mstopcv.m1 <- mstopcv.m2 <- matrix(NA, ncol=4, nrow=nrun)
# colnames(err.m1) <- colnames(err.m2) <- c("cont-0%", "cont-5%", "cont-10%", "cont-15%")
# colnames(mstopcv.m1) <- colnames(mstopcv.m2) <- colnames(err.m1)
# colnames(nvar.m1) <- colnames(nvar.m2) <- colnames(err.m1)
# colnames(errbest.m1) <- colnames(errbest.m2) <- colnames(err.m1)
# colnames(mstopbest.m1) <- colnames(mstopbest.m2) <- colnames(err.m1)
# for (ii in 1:nrun){
# set.seed(1000+ii)
# trid <- c(sample(which(y==1))[1:50], sample(which(y==-1))[1:50])
# dtr <- dat[trid, ]
# dte <- dat[-trid, ]
# ytrold <- y[trid]
# yte <- y[-trid]
# ### number of patients/no. variables in training and test data
# dim(dtr)
# dim(dte)
# ###randomly contaminate data
# ntr <- length(trid)
# set.seed(1000+ii)
# con <- sample(ntr)
# for(j in 1){ ### controls learntype
# for(i in 1:4){ ### i controls how many percentage of data contaminated
# ytr <- ytrold
# percon <- per[i]
# ### randomly flip labels of the samples in training set according to pre-defined contamination level
# if(percon > 0){
# ji <- con[1:(percon*ntr)]
# ytr[ji] <- -ytrold[ji]
# }
# dat.m1 <- bst(x=dtr, y=ytr, ctrl = bst_control(mstop=m, center=FALSE, trace=FALSE, nu=nu[k], s=s[k], trun=ctr.trun[k]), family = bsttype[k],
# learner = learntype[j])
# err1 <- predict(dat.m1, newdata=dte, newy=yte, type="error")
# err1tr <- predict(dat.m1, newdata=dtr, newy=ytr, type="loss")
# ### cross-validation to select best boosting iteration
# set.seed(1000+ii)
# cvm1 <- cv.bst(x=dtr, y=ytr, K=5, n.cores=n.cores, ctrl = bst_control(mstop=m, center=FALSE, trace=FALSE, nu=nu[k], s=s[k], trun=ctr.trun[k]), family = bsttype[k],
# learner = learntype[j], main=bsttype[k], type=tuning, plot.it=FALSE)
# optmstop <- max(10, which.min(cvm1$cv))
# err.m1[ii, i] <- err1[optmstop]
# nvar.m1[ii, i] <- nsel(dat.m1, optmstop)[optmstop]
# errbest.m1[ii, i] <- min(err1)
# mstopbest.m1[ii, i] <- which.min(err1)
# mstopcv.m1[ii, i] <- optmstop
# dat.m2 <- rbst(x=dtr, y=ytr, ctrl = bst_control(mstop=m, iter=100, nu=nu[k], s=s[k], trun=ctr.trun[k], center=FALSE, trace=FALSE), rfamily = rbsttype[k], learner = learntype[j])
# err2 <- predict(dat.m2, newdata=dte, newy=yte, type="error")
# err2tr <- predict(dat.m2, newdata=dtr, newy=ytr, type="loss")
# ### cross-validation to select best boosting iteration
# set.seed(1000+ii)
# cvm2 <- cv.rbst(x=dtr, y=ytr, K=5, n.cores=n.cores, ctrl = bst_control(mstop=m, iter=100, nu=nu[k], s=s[k], trun=ctr.trun[k], center=FALSE, trace=FALSE), rfamily = rbsttype[k],
# learner = learntype[j], main=rbsttype[k], type=tuning, plot.it=FALSE)
# optmstop <- max(10, which.min(cvm2$cv))
# err.m2[ii, i] <- err2[optmstop]
# nvar.m2[ii, i] <- nsel(dat.m2, optmstop)[optmstop]
# errbest.m2[ii, i] <- min(err2)
# mstopbest.m2[ii, i] <- which.min(err2)
# mstopcv.m2[ii, i] <- optmstop
# }
# }
# if(ii %% nrun ==0){
# if(bsttype[k]%in% c("closs", "gloss", "qloss"))
# cat(paste("\nbst family ", bsttype1[k], ", s=", s[k], ", nu=", nu[k], sep=""), "\n")
# if(bsttype[k]%in% c("binom", "hinge", "expo"))
# cat(paste("\nbst family ", bsttype1[k], ", nu=", nu[k], sep=""), "\n")
# cat("best misclassification error from bst\n")
# print(round(apply(errbest.m1, 2, summary7),4))
# cat("CV based misclassification error from bst\n")
# print(round(apply(err.m1, 2, summary7),4))
# cat("best mstop with best misclassification error from bst\n")
# print(round(apply(mstopbest.m1, 2, summary7),0))
# cat("best mstop with CV from bst\n")
# print(round(apply(mstopcv.m1, 2, summary7), 0))
# cat("nvar from bst\n")
# print(round(apply(nvar.m1, 2, summary7),1))
#
# cat(paste("\nrbst family ", rbsttype1[k], ", s=", s[k], ", nu=", nu[k], sep=""), "\n")
# cat("\nbest misclassification error from rbst\n")
# print(round(apply(errbest.m2, 2, summary7),4))
# cat("CV based misclassification error from rbst\n")
# print(round(apply(err.m2, 2, summary7),4))
# cat("best mstop with best misclassification error from rbst\n")
# print(round(apply(mstopbest.m2, 2, summary7),0))
# cat("best mstop with CV from rbst\n")
# print(round(apply(mstopcv.m2, 2, summary7),0))
# cat("nvar from rbst\n")
# print(round(apply(nvar.m2, 2, summary7),1))
# res <- list(err.m1=err.m1, nvar.m1=nvar.m1,
# errbest.m1=errbest.m1,
# mstopbest.m1=mstopbest.m1,
# mstopcv.m1=mstopcv.m1,
# err.m2=err.m2, nvar.m2=nvar.m2,
# errbest.m2=errbest.m2,
# mstopbest.m2=mstopbest.m2,
# mstopcv.m2=mstopcv.m2,
# s=s[k], nu=nu[k], trun=ctr.trun[k], family = bsttype[k], rfamily = rbsttype[k])
# if(plot.it){
# par(mfrow=c(2, 1))
# boxplot(err.m1, main="Misclassification error", subset="", sub=bsttype1[k])
# boxplot(err.m2, main="Misclassification error", subset="", sub=rbsttype1[k])
# boxplot(nvar.m1, main="No. variables", subset="", sub=bsttype1[k])
# boxplot(nvar.m2, main="No. variables", subset="", sub=rbsttype1[k])
# }
# check <- FALSE
# if(check){
# par(mfrow=c(3, 1))
# title <- paste('percentage of contamination ', percon, sep='')
# plot(err2tr, main=title, ylab="Loss value", xlab="Iteration", type="l", lty="dashed", col="red")
# points(err1tr, type="l", lty="solid", col="black")
# legend("topright", c(bsttype1[k], rbsttype1[k]), lty=c("solid", "dashed"), col=c("black","red"))
# plot(err2, main=title, ylab="Misclassification error", xlab="Iteration", type="l", lty="dashed", col="red")
# points(err1, type="l")
# legend("bottomright", c(bsttype1[k], rbsttype1[k]), lty=c("solid", "dashed"), col=c("black","red"))
# plot(nsel(dat.m2, m), main=title, ylab="No. variables", xlab="Iteration", lty="dashed", col="red", type="l")
# points(nsel(dat.m1, m), ylab="No. variables", xlab="Iteration", lty="solid", type="l", col="black")
# legend("bottomright", c(bsttype1[k], rbsttype1[k]), lty=c("solid", "dashed"), col=c("black","red"))
# }
# }
# }
# }
# print(proc.time()-ptm)
## ----sessionInfo------------------------------------------
sessionInfo();
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.