Nothing
ensemblePredict <-
function (formulaList,trainData,testData=NULL, predictType = c("prob", "linear"),type = c("LOGIT", "LM","COX","SVM"),Outcome=NULL,nk = 0)
{
medianWeightedI <- function(x, w) {
w <- w[order(x)]
x <- x[order(x)]
x[which.min(abs(filter(c(0,cumsum(w)/sum(w)), c(.5,.5), sides=1)[-1] - 0.5))]
}
# cat("Median\n")
# print(formulaList);
if (length(formulaList)==0)
{
cat("No formulas\n");
warning("No formulas");
result <- list(ensemblePredict=NULL,
medianKNNPredict=NULL,predictions=NULL,KNNpredictions=NULL,wPredict=NULL)
return (result)
}
if (is.null(Outcome))
{
nk = -1;
# for (i in 1:length(formulaList))
# {
# formulaList[i] <- paste(formulaList[i],"+1")
# }
varlist <- attr(terms(formula(formulaList[1])),"variables")
dependent <- as.character(varlist[[2]])
if (length(dependent)==3)
{
vOutcome = dependent[3];
}
else
{
vOutcome = dependent[1];
}
}
else
{
vOutcome=Outcome;
for (i in 1:length(formulaList))
{
if (formulaList[i] != "=-=End=-=")
{
if (gregexpr(pattern ='~',as.character(formulaList[i]))[1]>0)
{
feat <- unlist(strsplit(as.character(formulaList[i]),"[~]"));
if (nchar(feat[2])>0)
{
formulaList[i] <- feat[2];
}
else
{
formulaList[i] <- "1";
}
}
}
}
}
EquTrainSet <- trainData;
minTrainSamples <- nrow(trainData);
maxTrainSamples = minTrainSamples;
casesample <- NULL;
controlsample <- NULL;
noequalSets <- FALSE;
nrowcases <- minTrainSamples
nrowcontrols <- minTrainSamples
mweights <- numeric();
msize <- numeric();
if ((type == "LOGIT") || (type == "COX"))
{
casesample = subset(trainData,get(vOutcome) == 1);
controlsample = subset(trainData,get(vOutcome) == 0);
trainCaseses <- casesample;
trainControls <- controlsample;
nrowcases <- nrow(casesample);
nrowcontrols <- nrow(controlsample);
minTrainSamples <- min(c(nrow(casesample),nrow(controlsample)));
maxTrainSamples <- max(c(nrow(casesample),nrow(controlsample)));
noequalSets <- (minTrainSamples < 0.80*maxTrainSamples);
# if (noequalSets) warning("Case and Control sets no equal. Setting the size of the control and cases equal\n");
}
if (nk==0)
{
nk = 2*as.integer(sqrt(minTrainSamples/2)) + 1;
}
if (is.null(testData))
{
medianKNN=NULL;
out=NULL;
KNNpredictions=NULL;
outKNN=NULL;
medianout <- vector(mode="numeric",length = nrow(trainData));
wmedpredict <- medianout;
cat("\n");
for ( n in 1:nrow(trainData))
{
if ((n %% 10)==0) cat(".");
mp <- ensemblePredict(formulaList,trainData[-n,],trainData[n,],predictType,type,Outcome,nk = -1)
medianout[n] = mp$ensemblePredict[1]
wmedpredict[n] = mp$wPredict[1]
out <- rbind(out,cbind(trainData[n,vOutcome],mp$predictions[1,-1]));
}
rownames(out) <- rownames(trainData);
cat("\n");
}
else
{
wmedpred <- numeric(nrow(testData))
swts <- 0;
theoutcome <- testData[,vOutcome];
out <- NULL;
if (!is.null(Outcome))
{
if (nchar(formulaList[[1]])>0)
{
ftmp <- formula(paste(Outcome,"~ 1+",formulaList[[1]]))
}
else
{
ftmp <- formula(paste(Outcome,"~ 1"));
}
}
else
{
ftmp <- formula(formulaList[1])
}
if (noequalSets)
{
if (maxTrainSamples > nrowcases) trainCaseses <- casesample[sample(1:nrowcases,maxTrainSamples,replace=TRUE),]
if (maxTrainSamples > nrowcontrols) trainControls <- controlsample[sample(1:nrowcontrols,maxTrainSamples,replace=TRUE),]
EquTrainSet <- rbind(trainCaseses,trainControls)
}
if (nk>0)
{
outKNN <- cbind(theoutcome,getKNNpredictionFromFormula(ftmp,EquTrainSet,testData,Outcome=vOutcome,nk)$binProb)
rownames(outKNN) <- rownames(testData);
}
else
{
outKNN <- NULL;
medianKNN <- NULL;
}
bestmodel <- modelFitting(ftmp,EquTrainSet,type,fitFRESA=TRUE)
if (inherits(bestmodel, "try-error"))
{
curprediction <- numeric(nrow(testData));
wmedpred <- curprediction;
swts <- 0;
}
else
{
curprediction <- predict.fitFRESA(bestmodel,testData,predictType);
trainOutcome <- EquTrainSet[,vOutcome];
varOutcome <- var(trainOutcome);
trainPrediction <- predict.fitFRESA(bestmodel,EquTrainSet,predictType);
residual <- as.vector(abs(trainPrediction-trainOutcome));
Rwts <- (varOutcome-(mean(residual))^2)/varOutcome; #Correlation
if (Rwts<=0) Rwts <- 1.0e-4;
wmedpred <- Rwts*curprediction;
swts <- Rwts;
mweights <- c(mweights,Rwts);
msize <- c(msize,length(bestmodel$coef));
}
totSamples <- cbind(theoutcome,curprediction);
rownames(totSamples) <- rownames(testData);
out <- totSamples;
if (length(formulaList)>1)
{
for (i in 2:length(formulaList))
{
if (formulaList[i] != "=-=End=-=")
{
if (!is.null(Outcome))
{
if (nchar(formulaList[[i]])>0)
{
ftmp <- formula(paste(Outcome,"~1+",formulaList[[i]]));
}
else
{
ftmp <- formula(paste(Outcome,"~1"));
}
}
else
{
ftmp <- formula(formulaList[i]);
}
if (noequalSets && (runif(1)<0.20))
{
if (maxTrainSamples > nrowcases) trainCaseses <- casesample[sample(1:nrowcases,maxTrainSamples,replace=TRUE),]
if (maxTrainSamples > nrowcontrols) trainControls <- controlsample[sample(1:nrowcontrols,maxTrainSamples,replace=TRUE),]
EquTrainSet <- rbind(trainCaseses,trainControls)
trainOutcome <- EquTrainSet[,vOutcome];
varOutcome <- var(trainOutcome);
}
if (nk>0)
{
outKNN <- cbind(outKNN,getKNNpredictionFromFormula(ftmp,EquTrainSet,testData,Outcome=vOutcome,nk)$binProb);
}
if (length(all.vars(ftmp))>1)
{
fm <- modelFitting(ftmp,EquTrainSet,type,fitFRESA=TRUE)
if (inherits(fm, "try-error"))
{
if (noequalSets)
{
if (maxTrainSamples > nrowcases) trainCaseses <- casesample[sample(1:nrowcases,maxTrainSamples,replace=TRUE),]
if (maxTrainSamples > nrowcontrols) trainControls <- controlsample[sample(1:nrowcontrols,maxTrainSamples,replace=TRUE),]
EquTrainSet <- rbind(trainCaseses,trainControls)
trainOutcome <- EquTrainSet[,vOutcome];
varOutcome <- var(trainOutcome);
}
fm <- modelFitting(ftmp,EquTrainSet,type,fitFRESA=TRUE)
}
curprediction <- predict.fitFRESA(fm,testData,predictType);
trainPrediction <- predict.fitFRESA(fm,EquTrainSet,predictType);
residual <- as.vector(abs(trainPrediction-trainOutcome));
Rwts <- (varOutcome-(mean(residual))^2)/varOutcome; #Correlation
if (Rwts<=0) Rwts <- 1.0e-4;
wmedpred <- wmedpred + Rwts*curprediction;
swts <- swts + Rwts;
mweights <- c(mweights,Rwts);
msize <- c(msize,length(fm$coef));
out <- cbind(out,curprediction);
}
else
{
warning("No formula");
}
}
}
if (ncol(out)<3)
{
out <- cbind(out,curprediction);
}
out <- as.data.frame(out);
# medianout <- rowMedians(out[,-1],na.rm = TRUE);
# print(mweights)
# print(msize)
mweights <- mweights - 0.1*msize/max(msize);
medianout <- apply(out[,-1],1,medianWeightedI,w=mweights);
wmedpredict <- wmedpred;
if (swts>0) wmedpredict <- wmedpred/swts;
if (nk>0)
{
outKNN <- as.data.frame(outKNN);
medianKNN <- rowMedians(outKNN[,-1],na.rm = TRUE);
}
}
else
{
out <- as.data.frame(out);
medianout <- out[,-1];
wmedpredict <- medianout;
if (nk>0)
{
outKNN <- as.data.frame(outKNN);
medianKNN <- outKNN[,-1];
}
}
}
result <- list(ensemblePredict=medianout,
medianKNNPredict=medianKNN,predictions=out,KNNpredictions=outKNN,wPredict=wmedpredict)
return (result)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.