Nothing
cvPAMR <- function(logX, groupings, DIR, params=list(seed=123, ncv=5, repeats=10,max_allowed_feat=500, n.threshold=50)) {
fs.method <- "pamr"
seed <- params$seed
ncv <- params$ncv
repeats <- params$repeats
max_allowed_feat <- params$max_allowed_feat
n.threshold <- params$n.threshold
jitter <- params$jitter
## introduce some minimal noise to make scaling etc. possible
## might be an issue if lots of zero values, e.g. in NGS experiments
## occur.
if(jitter) {
logX <- jitter(logX)
}
if(!is.null(DIR)) {
SUBDIR <- paste(DIR,fs.method,sep="/")
if(!file.exists(SUBDIR))
dir.create(SUBDIR)
#X <- list(groupings=list(groupings, paste(SUBDIR, "groupings.pdf", sep="/")))
fnames <- paste(SUBDIR, "/", names(groupings), ".pdf", sep="")
} else {
SUBDIR <- NULL
fnames <- paste(names(groupings),".pdf",sep="")
}
X <- lapply(1:length(groupings), function(i,groupings,fnames) list(groupings[[i]], fnames[i]), groupings=groupings, fnames=fnames)
names(X) <- names(groupings)
## use multicores if more than one group is to be classified
useparallel <- length(grep("package:(parallel|multicore)", search())>0)
if(length(X)>1 & useparallel) {
resPAM <- mclapply(X, cv_pamclass, logX=logX, nfold=ncv, n.threshold=n.threshold, seed=seed, max_allowed_feat=max_allowed_feat, repeats=repeats, mc.preschedule=TRUE, mc.cores=length(X))
} else {
resPAM <- lapply(X, cv_pamclass, logX=logX, nfold=ncv, n.threshold=n.threshold, seed=seed, max_allowed_feat=max_allowed_feat, repeats=repeats)
}
## make the feature table
featlist <- sapply(resPAM, extract_feat_pam, SUBDIR=SUBDIR)
if(!is.null(SUBDIR)) {
save(resPAM, X, logX, ncv, n.threshold, fs.method, SUBDIR, featlist, file=paste(SUBDIR, "env.RData", sep="/"))
}
################################
## ROC Curves for PAM
if(!is.null(SUBDIR)) {
pdf(paste(SUBDIR, "PAMR_ROC_Curves.pdf", sep="/")) #, width=10, height=10)
}
## save the performance results in here
performance <- list()
#par(mfrow=c(3,3))
for(i in 1:length(X)) {
## find the distribution type
clvec <- X[[i]][[1]]
raus <- which(is.na(clvec))
if(length(raus)>0) {
clvec <- clvec[-raus]
}
if(length(unique(clvec))==2) {
distribution <- "bernoulli"
} else {
distribution <- "multinomial"
}
#obj <- resPAM[[i]]$cv
rpam <- resPAM[[i]]
yhat <- yreal <- list()# NULL
it <- 0
for(ri in 1:repeats) {
obj <- rpam[[ri]]$cv
yhat.rep <- yreal.rep <- NULL
for(j in 1:length(obj$folds)) {
it <- it+1
fold <- obj$folds[[j]]
th <- rpam[[ri]]$tmin
#th <- resPAM[[i]]$tmin
thind <- which(obj$threshold==th)
if(distribution=="multinomial") {
xpred <- obj$prob[fold,,thind]
Ncl <- ncol(xpred)
kx <- 0:Ncl * 2
argmax <- apply(xpred, 1, function(x) which(x==max(x)))
allmax <- apply(xpred, 1, max)
pred <- kx[argmax] + allmax
# attach the prediction probability for the classes. Note that these are one probability for each class, i.e. a matrix with n.class columns for each fold/repeat
#fitted <- cbind(fitted, pred)
yhat.rep <- c(yhat.rep, pred)
yreal.rep <- c(yreal.rep,as.numeric(obj$y)[fold])
} else {
## get the curves for each fold, for all classes and the best threshold
#probabilities of classifying the sample in the positive class
yhat.rep <- c(yhat.rep, as.vector(obj$prob[fold, 2, thind]))
yreal.rep <- c(yreal.rep,as.numeric(as.character(obj$y))[fold])
}
## combine the repeats in one single table
#yhat <- cbind(yhat, yhat.rep)
#yreal <- cbind(yreal, yreal.rep)
yhat[[it]] <- yhat.rep
yreal[[it]] <- yreal.rep
}
}
if(distribution=="multinomial") {
aucs <- vector("numeric", length(yhat))
for(ki in 1:length(yreal)) {
grpx <- yreal[[ki]]
predx <- yhat[[ki]]
aucs[ki] <- multiclass.roc(grpx~predx)$auc
}
Y <- X[[i]][[1]]
## plot auc distribution
boxplot(aucs, ylim=c(0,1), main=c(paste(Ncl, "- class classification"), "multinomial model"))
axis(1, at=1, labels="multiclass AUC")
legend("bottomright", border="white", fill="white", legend=c("classes:",levels(Y)))
roc.curve <- NULL
auc <- signif(median(aucs,na.rm=TRUE),digits=3)
} else {
## make the roc curve
pred <- prediction(as.vector(unlist(yhat)), as.vector(unlist(yreal)))
roc.curve <- performance(pred, "tpr", "fpr")
## since we concatenate each yhat/yreal pair of values,
## peformance(pred,"auc")@y.values always has exactly one
## element, so we use this for auc calculation
#auc <- signif(performance(pred, "auc")@y.values[[1]], digits=3)
aucs <- unlist(performance(pred, "auc")@y.values)
auc <- signif(median(aucs), digits=3)
plot(roc.curve, avg="none", spread.estimate="none", sub=paste("AUC:", auc), main=names(X)[i], lwd=2.5)
roc_binterval(yhat, yreal)
}
## save the aucs and the roc curve performance object
performance[[names(X)[i]]] <- list(fitted=yhat, labels=yreal, aucs=aucs, auc=auc, roc.curve=roc.curve, classes=levels(factor(clvec)))
}
if(!is.null(SUBDIR)) {
dev.off()
}
list(res=resPAM, featlist=featlist, performance=performance)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.