##This is the "main" function
##The parameter method determines the type of regression method used. "ols"= least squares regression,
## "quant" for quantile regression, "irls" for IRLS (Iterated Reweighted Least Square) regression,
## "lasso" for LASSO (Least Absolute Shrinkage and Selection Operator) regression and for "ridge"
## for L2 regularization regression.
MetaTun <-function(optpar=Inf,fvalueopt=Inf,Paramdata,instancesampled,order,budget,method="ols",tau=0.5,typeofresult="mean",ncores=1,packuser=NA,funcuser=NA){
###################Used only with simulated models########################
if (fvalueopt!=Inf) {
dist_eucli <- data.frame(exec=numeric(0),iter=numeric(0),dist=numeric(0))
gapt_opt <- data.frame(exec=numeric(0),gap=numeric(0))
}
dist <- Inf
##########################################################################
#set.seed(1234)
Checkpackages()
##################################################Starting parallel environment######################################################
library(doParallel)
maxcoresmachine <- detectCores()-1
if (is.character(ncores))
if (ncores=="MAX") no_cores <-maxcoresmachine
else stop("Error! The unique character value accepted to ncores is 'MAX'")
else
if (ncores%%1!=0) stop("Error! The number of cores must be an integer value >=1 or equal to 'MAX'")
else
if (ncores<1) stop("The value of ncores must be an integer >=1")
else
if (maxcoresmachine<ncores) no_cores <- maxcoresmachine
else no_cores <- ncores
cat("\n")
cat("Number of cores used:")
cat(no_cores)
cat("\n")
if ((is.numeric(ncores))&&(ncores%%1==0)&&(no_cores<ncores)){
warning("The number of cores specified is greater than number-of-cores-in-the-machine-1. Using number-of-cores-in-the-machine-1")
}
if (no_cores==1) warning("Using only 1 core. This is equivalent to a sequential processing")
################################################registering the cluster of cores#########################################################
cl <- makeCluster(no_cores,type="PSOCK")
registerDoParallel(cl)
clusterExport(cl,c("functuning")) ##########exporting "functuning" to the clusters#########
if (!is.na(funcuser[1])){ ##########exporting other functions to the clusters######
for (i in 1:length(funcuser)){
namefunc <- funcuser[i]
clusterExport(cl,c(as.character(namefunc)))
}
}
if (!is.na(packuser[1])) { ############exporting not native R packages##############
for (i in 1:length(packuser)) {
namepackage <- packuser[i]
clusterExport(cl,c("namepackage"),envir=environment())
clusterEvalQ(cl, library(namepackage,character.only=TRUE))
}
}
#########################################################################################################################################
##filenamepoints <- paste("points.csv",sep="") ##arquivo de log dos candidatos
nextid <-0
if (length(instancesampled)<5){
stop("Error!! MetaTun needs that the training instances set has at least 5 instances")
}
instancesvalues <- instancesampled
instances <- sample(length(instancesvalues),length(instancesvalues))
####This code refers to the situation when is accepted repetition of instances
# if (length(instancesvalues)<100) {
# instances <- sample(length(instancesvalues),length(instancesvalues))
# instances <- c(instances,sample(length(instancesvalues),100-(length(instances)),replace=TRUE))
# }
# else instances <- sample(length(instancesvalues),length(instancesvalues))
print("Order of using instances (according to Instances IDs)")
print(instances)
datatuning <- ReadParametersData(Paramdata)
initialcandidates <- (choose((length(datatuning$name)+order),order))*3
compbudget <- budget
if ((typeofresult!="mean") & (typeofresult!="sumrankings") & (typeofresult!="meanrankings")){
stop("Error!! The parameter 'typeofresult' must be equal to 'mean' or 'sumrankings' or 'meanrankings'")
}
if ((initialcandidates*5)>budget){
stop("Error!! The computational budget is not enough to perform the evaluation of initial candidates")
}
if ((method!="ols") & (method!="quant") & (method!="lasso") & (method!="ridge")){
stop("Error!! The regression method must be: \"ols\", \"irls\", \"quant\", \"lasso\", or \"ridge\" ")
}
firstcandidate <- 1
firstinstance <- 1
lastinstance <- 5
instancespertest <- 1
nbcandidates <- ceiling(initialcandidates*.2)
nbeliminated <- nbcandidates ####number of candidates eliminated at each iteration
eliminated <- numeric(0) ####vector of candidate IDs eliminated after each iteration
it <- 1
successful <- 1
while(TRUE) {
cat("\n")
cat("Iteration nro. ")
cat(it)
cat("\n")
if (it==1){
validmodel <- FALSE
ntrylhs <- 1
while((!validmodel)&&(compbudget>=(initialcandidates*lastinstance))){
allcandidates <- matrix(ncol=(length(datatuning$name)+1),nrow=0)
allcandidates<-GenInitialCandidates(initialcandidates,datatuning,allcandidates)
Results <- matrix(ncol=0, nrow=0)
cat("\n")
cat("Evaluating Initial candidates...")
cat("\n")
Results <- EvaluateCandidates(Results,allcandidates,instances,instancesvalues,c(firstcandidate:allcandidates[nrow(allcandidates),ncol(allcandidates)]),firstinstance,lastinstance)
#print(Results)
Resultsnorm <- NormalizeData(Results)
cat("\n")
cat("Evaluation of candidates after scaling results in an [0,1] scale")
cat("\n")
print(Resultsnorm)
cat("\n")
cat("\n")
cat("Regression Modeling...")
cat("\n")
model<-RegModel(Resultsnorm,allcandidates,datatuning,typeofresult,order,method,tau,weights=NA)
cat("...Done!")
##I am using summary(model, se="ker") instead of other methods when obtaining
##coefficients standard errors in "quant" regression.
##This method achieved the best performance considering confidence intervals X
##runtime.
#About a warning when using "nid" method in summary of quantile regression (rq function)
#for estimating standard errors.
#This explanation was found on rq FAQ. http://www.econ.uiuc.edu/~roger/research/rq/FAQ
#"What does the message "Non-positive fis" mean?
# When method ="nid" is used in summary local density estimates are made at
# each x_i value, in some cases these estimates can be negative and if so
# they are set to zero. This is generally harmless, leading to a somewhat
# conservative (larger) estimate of the standard errors, however if the
# reported number of non-positive fis is large relative to the sample size
# then it is an indication of misspecification of the model.
if (method=="quant") modelsum <- summary(model, se="ker") #modelsum <- summary(model, se="boot", bsmethod="wxy", R=500)
else if (method=="ols") modelsum <- summary(model)
else modelsum <- model
if (is.element(method,c("ols","quant"))) {
modelsum <- PruneModel(modelsum,method)
##verifying if a model is a valid model
if ((length(modelsum[[2]])>1) || ((length(modelsum[[2]])==1)&(modelsum[[2]][1]!="(Intercept)")) )
validmodel <- TRUE
}
else if (method=="ridge") validmodel <- TRUE
else if (!is.na(modelsum[[1]])) validmodel <- TRUE
#if (((length(which(modelsum[[4]]==0)))<((length(modelsum[[4]]))-1)) || (((length(which(modelsum[[4]]==0)))==((length(modelsum[[4]]))-1))&&(modelsum[[4]][1]==0))) validmodel <- TRUE
###if model is lasso, it is a valid model if at least one term different of intercept is different from zero
##print("modelo após validação - iteração 1")
##print(modelsum)
compbudget <- budget -((lastinstance*ncol(Resultsnorm))*ntrylhs)
ntrylhs <- ntrylhs + 1
}
if (validmodel==FALSE) {
print("It was impossible to generate a first valid regression model using the computational budget provided and the regression modeling strategy. Try another kind of regression modeling.")
successful <- 0
break
}
else {
cat("\n")
cat("Initial Candidates")
cat("\n")
print(allcandidates)
}
}
else {
budgetneeded <- (initialcandidates*instancespertest)
if ((lastinstance<=length(instancesvalues))&&(compbudget>=budgetneeded)){
cat("\n")
cat("Evaluating all candidates on new instances...")
cat("\n")
Results <- EvaluateCandidates(Results,allcandidates,instances,instancesvalues,setdiff((allcandidates[1,ncol(allcandidates)]:allcandidates[nrow(allcandidates),ncol(allcandidates)]),eliminated),firstinstance,lastinstance)
#print("Evaluation of candidates before scaling results in a [0,1] scale")
#print(Results)
Resultsnorm <- NormalizeData(Results)
#Resultsnormchecked <- CheckPseudoreplication(Resultsnorm,instances,firstinstance,lastinstance)
cat("\n")
cat("Evaluation of candidates after scaling results in an [0,1] scale")
cat("\n")
print(Resultsnorm)
compbudget <- compbudget - budgetneeded
cat("\n")
cat("------------------------------------------Computational Budget remaining-----------------------------------------")
cat("\n")
cat(compbudget)
cat(" Experiments")
}
cat("\n")
cat("\n")
cat("Regression Modeling...")
cat("\n")
weights <- GetWeights(Resultsnorm)
model<-RegModel(Resultsnorm,allcandidates,datatuning,typeofresult,order,method,tau,weights)
cat("...Done.")
if (method=="quant") modelsum <- summary(model, se="ker") #modelsum <- summary(model, se="boot", bsmethod="wxy", R=500)
else if (method=="ols") modelsum <- summary(model)
else modelsum <- model
###if model is lasso, it is a valid model if at least one term different of intercept is different from zero
###In the case of not getting non-zero coefficients, the method have to be terminated.
if (method=="lasso")
#if (((length(which(modelsum[[4]]==0)))==(length(modelsum[[4]]))) || (((length(which(modelsum[[4]]==0)))==((length(modelsum[[4]]))-1))&&(modelsum[[4]][1]!=0))) {
if (is.na(modelsum[[1]])){
print("The lasso modeling generated a model with all zero coefficients or only the intercept non-zero.")
print("Maybe the data are somewhat bad behaved to use lasso. Please try to use the other options of regression modeling")
break
}
}
cat("\n")
cat("Original model obtained...")
cat("\n")
cat("Generating derivated models...")
cat("\n")
if (is.element(method,c("ols","quant")))
if (it==1) models<-GenerateModels(modelsum[[1]],nbcandidates,method)
else models <- GenerateModels(modelsum$coefficients[,1:2],nbcandidates,method)
else models<-GenerateModels(modelsum[3:4],nbcandidates,method)
cat("...Done!")
#cat("\n")
#cat("Original Model and derivated models")
cat("\n")
#print(models)
newcandidates <- matrix(ncol=(length(datatuning$name)+1),nrow=0)
if (is.element(method,c("ols","quant")))
if (it==1) modeldescrip <- modelsum[[2]]
else modeldescrip <- rownames(modelsum$coefficients)
else modeldescrip <- modelsum[1:2]
funcorigin <- BuildFunction(modeldescrip,datatuning,models[[1]],method)
equationsforopt <- rep(funcorigin,length(models))
for (l in 2:length(models)) {
funcmodel <- BuildFunction(modeldescrip,datatuning,models[[l]],method)
equationsforopt[l]<-funcmodel
}
####if the amount of instances required until the next iteration exceeds the total of instances available, it will be sampled one to one at each
####new iteration
if ((lastinstance+instancespertest)>length(instancesvalues)) instancespertest <- 1
if (lastinstance>length(instancesvalues)) budgetneeded <- (nbcandidates*length(instancesvalues)) else budgetneeded <- (nbcandidates*lastinstance)
##old version, when candidates were not eliminated of evaluation phase and was allowed repetition of instances
#budgetneeded <- ((nbcandidates*lastinstance) + ((nbcandidates+ncol(Resultsnorm))*instancespertest))
if (compbudget>=budgetneeded) {
cat("\n")
cat("Optimizing Models and generating new candidates...")
cat("\n")
newcandidates <-as.matrix(Optimize(equationsforopt,datatuning))
cat("...Done!")
nextid<-initialcandidates +(it-1)*nbcandidates + 1
firstcandidate <- nextid
newcandidates[,(ncol(newcandidates))] <- c((nextid):(nextid+nrow(newcandidates)-1))
allcandidates <- rbind(allcandidates,newcandidates)
cat("\n")
cat("all candidates after generating new candidates by Optimization")
cat("\n")
print(allcandidates)
cat("\n")
cat("Evaluating the new candidates on all instances sampled...")
cat("\n")
cat("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX-----------------------------XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX")
cat("\n")
if (lastinstance>length(instancesvalues)) lastinstance <- length(instancesvalues) ####forces lastinstance to be the last available instance, if it is greater
Results <- EvaluateCandidates(Results,allcandidates,instances,instancesvalues,c(firstcandidate:allcandidates[nrow(allcandidates),ncol(allcandidates)]),1,lastinstance)
#print(Results)
Resultsnorm <- NormalizeData(Results)
eliminated <- c(eliminated,EliminateCandidates(allcandidates,Results,nbeliminated,eliminated))
cat("\n")
cat("Candidates not evaluated anymore")
cat("\n")
print(eliminated)
compbudget <- compbudget - budgetneeded
cat("\n")
cat("------------------------------------------Computational Budget remaining-----------------------------------------")
cat("\n")
cat(compbudget)
cat(" Experiments")
firstinstance <- lastinstance+1
lastinstance <- firstinstance+instancespertest-1
}
else {
cat("\n")
cat("The computational budget remaining is not sufficient to perform another iteration")
cat("\n")
cat("The method will be finished")
cat("\n")
Resultsnorm <- NormalizeData(Results)
#Resultsnormchecked <- CheckPseudoreplication(Resultsnorm,instances,firstinstance,lastinstance)
bestiter <- ReturnElite(Resultsnorm,1,allcandidates)
#######Only used in the case of simulated algorithm########
if (fvalueopt!=Inf) {
dists <- numeric(nrow(allcandidates))
for (l in 1:(nrow(allcandidates))) dists[l] <- DistEucli(allcandidates[l,1:((ncol(allcandidates))-1)],optpar)
if (min(dists)<dist) {
dist <- min(dists)
closestcand <- allcandidates[(order(dists)[1]),1:((ncol(allcandidates))-1)]
}
dist_eucli[nrow(dist_eucli)+1,] <- c(NA,it,dist)
}
###########################################################
break
}
bestiter <- ReturnElite(Resultsnorm,1,allcandidates)
#######Only used in the case of simulated algorithm########
if (fvalueopt!=Inf) {
dists <- numeric(nrow(allcandidates))
for (l in 1:(nrow(allcandidates))) dists[l] <- DistEucli(allcandidates[l,1:((ncol(allcandidates))-1)],optpar)
if (min(dists)<dist) {
dist <- min(dists)
closestcand <- allcandidates[(order(dists)[1]),1:((ncol(allcandidates))-1)]
}
dist_eucli[nrow(dist_eucli)+1,] <- c(NA,it,dist)
}
###########################################################
it <- it + 1
}
stopCluster(cl)
if (successful) {
#readline(prompt="Press [enter] to continue")
e<-ReturnElite(Resultsnorm,3,allcandidates)
best_par <- e[1,1:(ncol(e)-1)]
colnames(e) <- c(as.vector(datatuning$name),"Id")
best_par <- cbind(best_par)
colnames(best_par)[ncol(best_par)] <- c("exec")
best_par["exec"] <- NA
best_par <-as.data.frame(t(best_par))
#######Only used in the case of simulated algorithm########
if (fvalueopt!=Inf) {
valuefbest <- functuning(closestcand,0,exact=1)
if (fvalueopt==0) gap <- abs(valuefbest-fvalueopt)
else gap <- (abs(valuefbest-fvalueopt)/abs(fvalueopt))
gapt_opt[1,] <- c(NA,gap)
}
############################################################
cat("\n")
cat("Final model obtained")
cat("\n")
cat("-====================================================================================================================================-")
cat("\n")
print(models[[1]])
cat("-====================================================================================================================================-")
cat("\n")
cat("\n")
cat("Best Candidates")
cat("\n")
cat("-====================================================================================================================================-")
cat("\n")
print(e)
cat("-====================================================================================================================================-")
cat("\n")
cat("\n")
cat("-====================================================================================================================================-")
cat("\n")
cat("\n")
cat("The structure of parameters to be tuned. Describing the names, ranges, values(in case of categorical ones) and the type of parameters\n")
cat("-====================================================================================================================================-")
cat("\n")
print(datatuning)
cat("-====================================================================================================================================-")
cat("\n")
cat("\n")
cat("Returning the best parameter values the last model")
cat("\n")
if(fvalueopt==Inf) return(list(best_par, funcorigin))
else return((list(best_par,dist_eucli,gapt_opt,closestcand,funcorigin))) ###used in case of simulated algorithms
}
else {
print("Probably the model is not suitable to the data, or the data are somewhat bad behaved")
print("Try to use a higher computacional budget")
return(list(NA,NA,NA,NA,NA))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.