# The MIT License (MIT)
#
# Copyright (c) 2015-2016 Fabio Gabriel
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
# Metrics
#
.logLoss <- function(act, pred, eps=1e-15) {
nr <- length(pred)
pred <- pmax(pred, eps)
pred <- pmin(pred, 1-eps)
ll <- sum(act*log(pred) + (1-act)*log(1-pred))
ll <- ll * (-1/nr)
return (ll)
}
.mae <- function(actual, predicted) {
sum(abs(actual - predicted)) / length(actual)
}
.rmse <- function(y, y_hat) {
sqrt(mean((y-y_hat)^2))
}
.r2 <- function(y, y_hat) {
ss_res <- sum((y-y_hat)^2)
mu_y <- mean(y)
ss_tot <- sum((y-mu_y)^2)
1 - ss_res / ss_tot
}
VALID_METRICS <- c('logLoss', 'mae', 'rmse', 'r2')
.getMetric <- function(metricName) {
switch(metricName,
logLoss = .logLoss,
mae = .mae,
rmse = .rmse,
r2 = .r2,
metricName)
}
partitionData <- function(traindf, target) {
inTraining <- caret::createDataPartition(traindf[[target]], p=.75, list=F)
training <- traindf[inTraining, ]
testing <- traindf[-inTraining, ]
labels <- testing[[target]]
testing[[target]] <- NULL
return(list(train=training, test=testing, labels=labels))
}
#' create train control
#'
#' Creates a train control to be used in model validation.
#' @param metric The evaluation metric. This can be an user defined function. Defaults to LogLoss.
#' @param n The number of validation rounds. Defaults to 3.
#' @param method The sampling method. Defaults to repeated (Monte-Carlo) sampling.
createControl <- function(metric='logLoss', n=3, method="repeated") {
# TODO. Implement the remaining strategies.
if (method != "repeated") {
stop("repeated is the only implemented sampling approach")
}
if (!(method %in% c("repeated", "bootstrap", "cv"))) {
stop("Invalid validation method: ", method)
}
if (class(metric) == "character") {
if (!(metric %in% VALID_METRICS)) {
stop("Invalid metric: ", metric)
}
metric <- .getMetric(metric)
} else {
stop("Invalid metric")
}
list(metric=metric, n=n, method="repeated")
}
.performTunningRounds <- function(traindf, methodName, params, target, metric, n, verbose, ...) {
scoreSum <- 0
method <- get(methodName)
for (i in 1:n) {
l <- partitionData(traindf, target)
preds <- method(l$train, l$test, params, ...)
scoreSum <- scoreSum + metric(l$label, preds)
}
score <- scoreSum/n
return(score)
}
displayFmtLn <- function(...) {
cat(sprintf(...), "\n")
}
displayFmt <- function(...) {
cat(sprintf(...))
}
.dfToStr <- function(dataf) {
buffer <- ""
for (n in names(dataf)) {
buffer <- sprintf("%s%s=%s", buffer, n, dataf[,n])
buffer <- sprintf("%s, ", buffer)
}
substr(buffer, 1, nchar(buffer) - 2)
}
tuneModelParameters <- function(traindf, method, paramGrid, target, control, verbose=F, ...) {
scores <- c()
metric <- control$metric
n <- control$n
display <- displayFmt
displayLn <- displayFmtLn
if (!verbose) {
display <- function(...) {}
displayLn <- function(...) {}
}
displayLn('Tunning %s', method)
if (!(target %in% names(traindf))) {
stop("Not a valid label: ", target)
}
for (i in 1:nrow(paramGrid)) {
paramRow <- paramGrid[i, ]
displayLn("Iteration %d of %d", i, nrow(paramGrid))
tm <- proc.time()
newScore <- .performTunningRounds(traindf, method, paramRow, target, metric, n, verbose=verbose, ...)
displayLn("%s (%s): %f", method, .dfToStr(paramRow), newScore)
displayLn("Elapsed time: %f", proc.time()[[3]] - tm[[3]])
scores <- c(scores, newScore)
}
return(cbind(paramGrid, scores))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.