baggedModelS <-
function(modelFormulas,data,type=c("LM","LOGIT","COX"),Outcome=NULL,timeOutcome=NULL)
{
type <- match.arg(type)
formulaLoops <- 1;
observations <- nrow(data);
avgLogPvalues <- NULL;
forder <- NULL;
# cat(length(modelFormulas)," :Bagging\n",modelFormulas[1],"\n");
model <- NULL;
wts <- 0.0;
if (inherits(modelFormulas,"list"))
{
listformulas <- modelFormulas;
modelFormulas <- character();
for (i in 1:length(listformulas))
{
modelFormulas <- append(modelFormulas,paste(listformulas[[i]],collapse = ' + '))
}
}
coefList <- NULL;
formulaList <- NULL;
wtsList <- NULL;
oF <- NULL;
wtsfit <- NULL;
outPred <- NULL;
if (length(modelFormulas) > 0)
{
if (modelFormulas[1] != "=-=End=-=")
{
iscompletef <- (gregexpr(pattern ='~',modelFormulas[1])[1] > 0)
if (iscompletef && is.null(Outcome))
{
varsmod <- all.vars(formula(modelFormulas[1]));
if (substr(modelFormulas[1],1,5)!="Surv(")
{
Outcome <- varsmod[1];
}
else
{
Outcome <- varsmod[2];
timeOutcome <- varsmod[1];
}
}
outcomes <- c(Outcome,timeOutcome);
theoutcome <- data[,Outcome];
binoutcome <- (length(table(theoutcome))==2) && (min(theoutcome)==0);
predtype="linear";
if (binoutcome) predtype="prob";
if ( (type=="LM") && (binoutcome==TRUE) )
{
data[,Outcome] = 2*theoutcome-1.0;
predtype="linear";
}
if (type!="COX")
{
baseForm <- paste(Outcome,"~ 1");
}
else
{
baseForm <- paste("Surv(",timeOutcome,",",Outcome,")~ 1");
}
frma <- baseForm;
Jaccard.SM <- 0;
coefEvolution <- NULL;
avgsize <- 0;
formulaNetwork <- NULL;
WformulaNetwork <- NULL;
coefList <- list();
formulaList <- list();
wtsList <- numeric();
coef_in <- 1;
# cat(length(modelFormulas)," :Entering orderFeatures\n");
oF <- orderFeatures(modelFormulas,univariate=NULL,baseForm);
VarFrequencyTable <- oF$VarFrequencyTable;
if (!is.null(VarFrequencyTable))
{
# print(VarFrequencyTable);
forder <- oF$forder;
theterms <- oF$theterms;
features <- oF$features;
modelFormulas <- oF$modelFormulas;
if (length(names(VarFrequencyTable)) > 0)
{
loops <- length(modelFormulas);
vnames <- rownames(VarFrequencyTable);
formulaNetwork <- matrix(0,nrow=length(VarFrequencyTable),ncol = length(VarFrequencyTable))
dimnames(formulaNetwork) <- list(names(VarFrequencyTable),names(VarFrequencyTable))
m <- formulaNetwork
WformulaNetwork <- formulaNetwork
Jaccard.SM <- 0;
tota <- 0;
for (n in 1:loops)
{
feat <- theterms[[n]];
lft <- length(feat);
m[,] <- 0;
if (lft>0)
{
m[feat,feat]=1;
if (n<loops)
{
for (i in (n+1):loops)
{
feat2 <- theterms[[i]];
if (length(feat2) > 0)
{
Jaccard.SM = Jaccard.SM+sum(duplicated(c(feat2,feat)))/length(unique(c(feat2,feat)));
tota = tota + 1;
}
}
}
}
formulaNetwork <- formulaNetwork+m
}
if (tota>1) Jaccard.SM = Jaccard.SM/tota;
fnrom <- loops;
if (oF$numberofBreaks>0) fnrom <- oF$numberofBreaks;
formulaNetwork <- round(formulaNetwork/fnrom,digits = 3);
# cat("Size :",nrow(data)," Features :",length(VarFrequencyTable))
frma <- baseForm;
bmodelsize <- 1;
avgsize <- 0;
coefEvolution <- NULL;
for ( i in 1:length(vnames))
{
if ((vnames[i] != " ") && (vnames[i] != ""))
{
frma <- paste(frma,"+",vnames[i]);
bmodelsize = bmodelsize + 1;
}
}
model <- modelFitting(frma,data,type=type,fitFRESA=TRUE);
if (bmodelsize>1)
{
thevars <- all.vars(formula(frma));
if (inherits(model, "try-error"))
{
# cat("Fitting Error\n");
warning(frma," Warning Bagging Fitting error\n")
}
else
{
msize <- length(model$coefficients)
basecoef <- abs(model$coefficients)+1e-6;
names(basecoef) <- names(model$coefficients);
if ((type=="COX") && !inherits(model,"fitFRESA"))
{
avgLogPvalues <- numeric(length(model$coefficients));
names(avgLogPvalues) <- names(model$coefficients);
}
else
{
avgLogPvalues <- numeric(length(model$coefficients)-1);
names(avgLogPvalues) <- names(model$coefficients)[-1];
}
addedZvalues <- avgLogPvalues;
addedCoef <- avgLogPvalues;
baggingAnalysis <- list();
baggingAnalysis$uMS_values <- avgLogPvalues;
baggingAnalysis$rMS_values <- avgLogPvalues;
baggingAnalysis$NeRI_values <- avgLogPvalues;
baggingAnalysis$pt_values <- avgLogPvalues;
baggingAnalysis$pWilcox_values <- avgLogPvalues;
baggingAnalysis$pF_values <- avgLogPvalues;
baggingAnalysis$pBin_values <- avgLogPvalues;
baggingAnalysis$mMSE_values <- avgLogPvalues;
baggingAnalysis$dMSE_values <- avgLogPvalues;
baggingAnalysis$uAcc_values <- avgLogPvalues;
baggingAnalysis$rAcc_values <- avgLogPvalues;
baggingAnalysis$uAUC_values <- avgLogPvalues;
baggingAnalysis$rAUC_values <- avgLogPvalues;
baggingAnalysis$dAUC_values <- avgLogPvalues;
baggingAnalysis$idi_values <- avgLogPvalues;
baggingAnalysis$nri_values <- avgLogPvalues;
baggingAnalysis$zidi_values <- avgLogPvalues;
baggingAnalysis$znri_values <- avgLogPvalues;
baggingAnalysis$mAUC_values <- avgLogPvalues;
baggingAnalysis$mACC_values <- avgLogPvalues;
baggingAnalysis$coefstd <- avgLogPvalues;
baggingAnalysis$coefficients <- avgLogPvalues;
baggingAnalysis$wts <- avgLogPvalues;
# print(basecoef);
avgsize <- msize-1;
mado <- NA;
rnames <- 0;
if ((msize > 1)&&(loops>1))
{
model$type=type;
onames <- names(model$coefficients);
mmult <- 1+1*(type=="COX");
model$estimations <- numeric(mmult*msize);
wts <- 0;
model$coefficients <- numeric(msize);
names(model$coefficients) <- onames;
modelmeans <- model$coefficients;
coefEvolution <- c(0,model$coefficients);
names(coefEvolution) <- c("Weight",names(model$coefficients));
# cat("\n");
tot_cycles <- 0;
# print(model$coefficients);
avgsize = 0;
preddata <- data[,outcomes];
predformula <- baseForm
prenames <- outcomes
onlypred <- NULL;
# print(predtype)
bestfit <- NULL
# bestformula <- baseForm
# allprednames <- NULL
fidx=1;
outPred <- list();
for (n in 1:loops)
{
if ((n %% 10) == 0) cat(".");
feat <- theterms[[n]]
if (length(feat)>0)
{
pname <- sprintf("V%d",n)
outPred[[fidx]] <- modelFitting(formula(modelFormulas[n]),data,type,fitFRESA=TRUE);
outPred[[fidx]]$model <- NULL
environment(outPred[[fidx]]$formula) <- globalenv();
environment(outPred[[fidx]]$terms) <- globalenv();
preddata <- cbind(preddata,predict.fitFRESA(outPred[[fidx]],data,"linear"));
prenames <- c(prenames,pname);
predformula <- paste(predformula," + ",pname,sep="")
onlypred <- c(onlypred,pname);
fidx <- fidx + 1
}
}
preddata <- as.data.frame(preddata)
colnames(preddata) <- prenames;
# wtsfit <- modelFitting(formula(predformula),preddata,type,fitFRESA=TRUE);
if (type=="LOGIT")
{
wtsfit <- LASSO_MIN(formula(predformula),preddata,family="binomial");
}
else
{
wtsfit <- LASSO_MIN(formula(predformula),preddata);
}
# print(wtsfit$coef)
attr(wtsfit,"baseForm") <- baseForm
attr(wtsfit,"outcomes") <- outcomes
attr(wtsfit,"prenames") <- prenames
attr(wtsfit,"predtype") <- predtype
environment(wtsfit$formula) <- globalenv();
# environment(wtsfit$terms) <- globalenv();
# print(summary(wtsfit)$coef)
if (!inherits(wtsfit, "try-error"))
{
# allwts <- wtsfit$coefficients[onlypred];
allwts <- wtsfit$coef[onlypred];
allwts[is.na(allwts)] <- 0
# print(allwts);
}
else
{
allwts <- rep(1.0,fidx-1);
}
names(allwts) <- onlypred
allwts[allwts < 1.0e-12] <- 1.0e-12
# print(allwts);
# print(predformula)
# print(colnames(preddata))
# wtsfit <- modelFitting(formula(bestformula),preddata,type,fitFRESA=TRUE);
# print(onlypred);
# allwts <- numeric(length(allprednames));
# names(allwts) <- allprednames
# allwts[onlypred] <- wtsfit$coefficients[onlypred];
# print(allwts);
fidx=1;
for (n in 1:loops)
{
if ((n %% 10) == 0) cat(".");
feat <- theterms[[n]]
avgsize = avgsize+length(feat);
if (length(feat)>0)
{
Rwts <- allwts[fidx]
# print(Rwts);
fidx <- fidx + 1;
out <- modelFitting(formula(modelFormulas[n]),data,type,fitFRESA=TRUE);
coef_Panalysis <- NULL;
if (!inherits(out, "try-error"))
{
osize <- length(out$coefficients)
if (osize > 1)
{
coefList[[coef_in]] <- out$coefficients;
formulaList[[coef_in]] <- modelFormulas[n];
coef_in <- coef_in + 1;
tot_cycles = tot_cycles + 1;
onames <- names(out$coefficients);
znames <- onames;
if ((type!="COX") || inherits(out,"fitFRESA")) znames <- onames[-1];
if (predtype=="linear")
{
gvar <- getVar.Res(out,data=data,Outcome=Outcome,type=type,testData=data)
coef_Panalysis <- -log10(gvar$FP.value);
}
else
{
gvar <- getVar.Bin(out,data=data,Outcome=Outcome,type=type,testData=data)
coef_Panalysis <- -log10(1.0-pnorm(gvar$z.IDIs));
}
infnum <- is.infinite(coef_Panalysis)
if (sum(infnum)>0)
{
# print(coef_Panalysis);
coef_Panalysis[coef_Panalysis == Inf] <- 100.0;
}
coef_Panalysis[coef_Panalysis > 100.0] <- 100.0;
if (predtype=="linear")
{
baggingAnalysis$uMS_values[znames] <- baggingAnalysis$uMS_values[znames] + Rwts*gvar$unitestMSE;
baggingAnalysis$rMS_values[znames] <- baggingAnalysis$rMS_values[znames] + Rwts*gvar$redtestMSE;
baggingAnalysis$NeRI_values[znames] <- baggingAnalysis$NeRI_values[znames] + Rwts*gvar$NeRIs;
baggingAnalysis$pF_values[znames] <- baggingAnalysis$pF_values[znames] + Rwts*log(gvar$FP.value);
baggingAnalysis$pt_values[znames] <- baggingAnalysis$pt_values[znames] + Rwts*log(gvar$tP.value);
baggingAnalysis$pBin_values[znames] <- baggingAnalysis$pBin_values[znames] + Rwts*log(gvar$BinP.value);
baggingAnalysis$pWilcox_values[znames] <- baggingAnalysis$pWilcox_values[znames] +
Rwts*log(gvar$WilcoxP.value);
baggingAnalysis$mMSE_values[znames] <- baggingAnalysis$mMSE_values[znames] + Rwts*gvar$FullTestMSE;
baggingAnalysis$dMSE_values[znames] <- (baggingAnalysis$dMSE_values[znames] +
Rwts*(gvar$redtestMSE - gvar$FullTestMSE) );
}
else
{
baggingAnalysis$uAcc_values[znames] <- baggingAnalysis$uAcc_values[znames] + Rwts*gvar$uniTestAccuracy;
baggingAnalysis$rAcc_values[znames] <- baggingAnalysis$rAcc_values[znames] + Rwts*gvar$redtestAccuracy;
baggingAnalysis$uAUC_values[znames] <- baggingAnalysis$uAUC_values[znames] + Rwts*gvar$uniTestAUC;
baggingAnalysis$rAUC_values[znames] <- baggingAnalysis$rAUC_values[znames] + Rwts*gvar$redtestAUC;
baggingAnalysis$idi_values[znames] <- baggingAnalysis$idi_values[znames] + Rwts*gvar$IDIs;
baggingAnalysis$nri_values[znames] <- baggingAnalysis$nri_values[znames] + Rwts*gvar$NRIs;
baggingAnalysis$zidi_values[znames] <- baggingAnalysis$zidi_values[znames] + Rwts*gvar$z.IDIs;
baggingAnalysis$znri_values[znames] <- baggingAnalysis$znri_values[znames] + Rwts*gvar$z.NRIs;
baggingAnalysis$mAUC_values[znames] <- baggingAnalysis$mAUC_values[znames] + Rwts*gvar$fullTestAUC;
baggingAnalysis$dAUC_values[znames] <- (baggingAnalysis$dAUC_values[znames] +
Rwts*(gvar$fullTestAUC-gvar$redtestAUC) );
baggingAnalysis$mACC_values[znames] <- baggingAnalysis$mACC_values[znames] + Rwts*gvar$fullTestAccuracy;
}
avgLogPvalues[znames] <- avgLogPvalues[znames] + coef_Panalysis;
rnames <- append(rnames,tot_cycles)
outmeans <- out$coefficients;
wts = wts + Rwts;
model$coefficients[onames] <- model$coefficients[onames] + Rwts*out$coefficients[onames];
baggingAnalysis$coefficients[znames] <- baggingAnalysis$coefficients[znames] + Rwts*out$coefficients[znames];
baggingAnalysis$coefstd[znames] <- baggingAnalysis$coefstd[znames] + Rwts*(out$coefficients[znames]^2);
baggingAnalysis$wts[znames] <- baggingAnalysis$wts[znames] + rep(Rwts,length(znames));
coefEvolution <- rbind(coefEvolution,c(Rwts,model$coefficients/wts));
# print(model$coefficients/wts);
addedZvalues[znames] <- addedZvalues[znames] + rep(Rwts,length(znames));
addedCoef[znames] <- addedCoef[znames] + rep(1,length(znames));
if (type=="COX")
{
fullmodelmeans <- abs(modelmeans[onames]);
names(fullmodelmeans) <- onames
for (ei in 1:osize)
{
outmeans[ei] <- out$estimations[osize+ei];
}
for (ei in onames)
{
if (fullmodelmeans[ei]>0)
{
modelmeans[ei] <- 0.5*(modelmeans[ei] + outmeans[ei]);
}
else
{
modelmeans[ei] <- outmeans[ei];
}
}
}
# print(model$coefficients)
}
}
else
{
cat("+");
# print(out$coef);
}
}
}
avgsize = avgsize/loops;
if( wts > 0)
{
conames <- names(model$coefficients);
model$coefficients <- model$coefficients/wts;
names(model$coefficients) <- conames;
# print(model$coefficients);
baggingAnalysis$coefficients <- baggingAnalysis$coefficients/baggingAnalysis$wts;
# gain <- model$coefficients[names(baggingAnalysis$coefficients)]/baggingAnalysis$coefficients;
# baggingAnalysis$coefstd <- gain*sqrt(abs(baggingAnalysis$coefstd/baggingAnalysis$wts-(baggingAnalysis$coefficients)^2));
# baggingAnalysis$coefficients <- gain*baggingAnalysis$coefficients;
coefEvolution <- as.data.frame(coefEvolution);
rownames(coefEvolution) <- rnames
if (type == "COX")
{
model$estimations <- c(model$coefficients,modelmeans);
}
else
{
model$estimations <- model$coefficients;
}
avgLogPvalues <- avgLogPvalues/addedCoef;
baggingAnalysis$formula.list <- modelFormulas;
baggingAnalysis$uMS_values <- baggingAnalysis$uMS_values/addedZvalues;
baggingAnalysis$rMS_values <- baggingAnalysis$rMS_values/addedZvalues;
baggingAnalysis$NeRI_values <- baggingAnalysis$NeRI_values/addedZvalues;
baggingAnalysis$pt_values <- exp(baggingAnalysis$pt_values/addedZvalues);
baggingAnalysis$pWilcox_values <- exp(baggingAnalysis$pWilcox_values/addedZvalues);
baggingAnalysis$pF_values <- exp(baggingAnalysis$pF_values/addedZvalues);
baggingAnalysis$pBin_values <- exp(baggingAnalysis$pBin_values/addedZvalues);
baggingAnalysis$uAcc_values <- baggingAnalysis$uAcc_values/addedZvalues;
baggingAnalysis$rAcc_values <- baggingAnalysis$rAcc_values/addedZvalues;
baggingAnalysis$uAUC_values <- baggingAnalysis$uAUC_values/addedZvalues;
baggingAnalysis$rAUC_values <- baggingAnalysis$rAUC_values/addedZvalues;
baggingAnalysis$idi_values <- baggingAnalysis$idi_values/addedZvalues;
baggingAnalysis$nri_values <- baggingAnalysis$nri_values/addedZvalues;
baggingAnalysis$zidi_values <- baggingAnalysis$zidi_values/addedZvalues;
baggingAnalysis$znri_values <- baggingAnalysis$znri_values/addedZvalues;
baggingAnalysis$mAUC_values <- baggingAnalysis$mAUC_values/addedZvalues;
baggingAnalysis$dAUC_values <- baggingAnalysis$dAUC_values/addedZvalues;
baggingAnalysis$mACC_values <- baggingAnalysis$mACC_values/addedZvalues;
baggingAnalysis$mMSE_values <- baggingAnalysis$mMSE_values/addedZvalues;
baggingAnalysis$dMSE_values <- baggingAnalysis$dMSE_values/addedZvalues;
baggingAnalysis$wts <- baggingAnalysis$wts/addedCoef;
baggingAnalysis$RelativeFrequency <- VarFrequencyTable/fnrom;
baggingAnalysis$Jaccard.SM <- Jaccard.SM;
baggingAnalysis$coeff_n_samples <- addedCoef;
baggingAnalysis$observations <- observations;
baggingAnalysis$avgLogPvalues <- avgLogPvalues;
# model$linear.predictors <- predict(model);
model$linear.predictors <- predict.fitFRESA(out,data,"linear");
model$formula.List <- formulaList;
# model$coefficients.List <- coefList;
model$wts.List <- wtsList;
model$fraction <- 1.0/nrow(data);
model$baggingAnalysis <- baggingAnalysis;
WformulaNetwork <- sqrt(WformulaNetwork);
diag(WformulaNetwork) <- 0;
WformulaNetwork <- WformulaNetwork/max(WformulaNetwork);
}
else
{
model <- modelFitting(formula(frma),data,type=type,fitFRESA=TRUE)
# print(model$coefficients)
model$coefficients[is.nan(model$coefficients)] <- 0.0;
model$coefficients[is.na(model$coefficients)] <- 0.0;
model$estimations[is.nan(model$estimations)] <- 0.0;
model$estimations[is.na(model$estimations)] <- 0.0;
}
}
else
{
model <- modelFitting(formula(frma),data,type=type,fitFRESA=TRUE)
model$coefficients[is.nan(model$coefficients)] <- 0.0;
model$coefficients[is.na(model$coefficients)] <- 0.0;
model$estimations[is.nan(model$estimations)] <- 0.0;
model$estimations[is.na(model$estimations)] <- 0.0;
}
}
}
}
else
{
model <- modelFitting(formula(frma),data,type=type,fitFRESA=TRUE);
}
}
else
{
model <- modelFitting(formula(frma),data,type=type,fitFRESA=TRUE);
}
# print(model$coefficients);
# model$model <- NULL
environment(model$formula) <- globalenv();
environment(model$terms) <- globalenv();
}
else
{
warning("No Formulas\n");
model <- NULL;
frma <- NULL;
VarFrequencyTable <- NULL;
avgsize <- 0;
formulaNetwork <- NULL;
Jaccard.SM <- 0;
coefEvolution <- NULL;
}
}
else
{
warning("No Formulas\n");
model <- NULL;
frma <- NULL;
VarFrequencyTable <- NULL;
avgsize <- 0;
formulaNetwork <- NULL;
Jaccard.SM <- 0;
coefEvolution <- NULL;
}
result <- list(bagged.model=model,
formula=frma,
frequencyTable=VarFrequencyTable,
averageSize=avgsize,
formulaNetwork=formulaNetwork,
WformulaNetwork=WformulaNetwork,
Jaccard.SM = Jaccard.SM,
coefEvolution=coefEvolution,
featureLocation=forder,
predfit = wtsfit,
outPred = outPred
);
class(result) <- c("BAGGS");
return (result);
}
#' @method predict BAGGS
predict.BAGGS <-
function (object,...)
{
if (!is.null(object$predfit))
{
out <- NULL
testData <- NULL
impute=FALSE;
parameters <- list(...);
testData <- parameters[[1]];
preddata <- testData[,attr(object$predfit,"outcomes")];
for (ffit in object$outPred)
{
preddata <- cbind(preddata,predict.fitFRESA(ffit,testData,"linear"));
}
# print(colnames(preddata))
preddata <- as.data.frame(preddata)
colnames(preddata) <- attr(object$predfit,"prenames");
# print(colnames(preddata))
# cat(class(object$predfit))
out <- predict(object$predfit,preddata)
if (attr(object$predfit,"predtype")=="prob")
{
out <- 1.0/(1.0+exp(-out));
}
}
else
{
out <- predict(object$bagged.model,...)
}
return (out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.