R/mlearning.R
In mlearning: Machine learning algorithms with unified interface and confusion matrices

Documented in cvpredict.mlearning predict.mlearning

response <- function (object, ...)
	UseMethod("response")
	
response.default <- function (object, ...)
	attr(object, "response")
	
train <- function (object, ...)
	UseMethod("train")
	
train.default <- function (object, ...)
	attr(object, "train")

## TODO: test performances of optimized code for Class ~ .
mlearning <- function (formula, data, method, model.args, call = match.call(),
..., subset, na.action = na.fail)
{
	## Our own construction of response vector and terms matrix
	if (missing(model.args))
		model.args <- list(formula  = formula, data = substitute(data),
			subset = substitute(subset))
	
	## Get data and initial number of cases
	data <- eval.parent(model.args$data)
	nobs <- NROW(data)
	
	## Special case for formula like response ~ . which speeds up calc and
	## uses less memory than model.frame()
	isSimpleFormula <- function (formula) {
		vars <- all.vars(formula)
		(length(vars) == 2 && vars[2] == ".") || # Supervised (response ~ .)
		(length(vars) == 1 && vars[1] == ".")	 # Unsupervised (~ .)
	}	
	optim <- isSimpleFormula(model.args$formula)
	if (optim) {
		## data do not need to be changed... except for subset or na.action
		if (model.args$subset != "")
			data <- data[eval.parent(model.args$subset), ] 
		if (missing(na.action) || as.character(na.action) == "") {
			## Use same rules as model.frame():
			## (1) any na.action attribute of data
			na.action <- attr(data, "na.action")
			## (2) option na.action, or (3) na.fail
			if (is.null(na.action))
				na.action <- getOption("na.action", na.fail)
		}
		## Apply provided na.action
		data <- match.fun(na.action)(data)
		if (is.function(na.action)) na.action <- substitute(na.action)
		na.action <- as.character(na.action)
		model.terms <- terms(formula, data = data[1, ])
		attr(data, "terms") <- model.terms
	} else { # Use model.frame()
		if (missing(na.action) || as.character(na.action) == "") {
			data <- do.call("model.frame", model.args)
			na.action <- as.character(attr(data, "na.action"))
			if (!length(na.action)) {
				na.action <- "na.pass" # If not provided, either pass, or no NAs!
			} else na.action <- paste("na", class(na.action), sep = ".")
		} else {
			model.args$na.action <- na.action
			data <- do.call("model.frame", model.args)
			if (is.function(na.action)) na.action <- substitute(na.action)
			na.action <- as.character(na.action)
		}
		model.terms <- attr(data, "terms")
	}
	## Final number of observations
	nobs[2] <- NROW(data)
	names(nobs) <- c("initial", "final")
	
	## Construct the matrix of numeric predictors and the response
	term.labels <- attr(model.terms, "term.labels")
	response.pos <- attr(model.terms, "response")
	if (!response.pos) {
		response.label <- NULL
		train <- data
		response <- NULL
		lev <- NULL
		type <- "unsupervised"
	} else { # Supervised classification or regression
		response.label <- deparse(attr(model.terms, "variables")
			[[response.pos + 1]])
		response <- data[, response.label]
		if (is.factor(response)) {
			lev <- levels(response)
			response <- droplevels(response)
			type <- "classification"
		} else {
			if (!is.numeric(response))
				stop("response variable must be factor or numeric")
			lev <- NULL
			type <- "regression"
		}
		train <- data[, term.labels]
	}
	
	## Calculate weights
	w <- model.weights(data)
	if (length(w) == 0L) w <- rep(1, nrow(train))
	
	## Pass special arguments to the default method
	args <- list()
	args$formula <- formula
	args$levels <- lev
	args$n <- nobs
	args$weights <- w
	args$optim <- optim
	args$type <- type
	args$na.action <- substitute(na.action)
	args$mlearning.call <- call
	args$method <- method
	
	## Construct the mlearning object
	match.fun(method)(train = train, response = response, .args. = args, ...)
}

print.mlearning <- function (x, ...)
{
	cat("A mlearning object of class ", class(x)[1], " (",
		attr(x, "algorithm"), "):\n", sep = "")
	type <- attr(x, "type")
	switch(type,
		regression = cat("[regression variant]\n"),
		unsupervised = cat("[unsupervised classification variant]\n"))
	cat("Call: ", deparse(attr(x, "mlearning.call")),
		"\n", sep = "")
	
	if (type == "classification") {
		## Number of cases used
		n <- attr(x, "n")
		if (n["final"] < n["initial"]) {
			msg <- paste("Trained using", n["final"], "out of",
				n["initial"], "cases:")
		} else msg <- paste("Trained using", n["final"], "cases:")
	
		## Categories
		classes <- attr(x, "response")
		levUsed <- levels(classes)
		levIni <- levels(x)
		if (length(levUsed) < length(levIni)) {
			cat("Levels with no cases in the training set that were eliminated:\n")
			print(levIni[!levIni %in% levUsed])
		}

		## Number of cases per used categories
		print(table(classes, dnn = msg))
	}
	return(invisible(x))
}

summary.mlearning <- function (object, ...)
{
	train <- attr(object, "train")
	response <- attr(object, "response")
	mlearning.class <- class(object)[1]
	class(object) <- class(object)[-(1:2)]
	## Summary is sometimes implemented as print() for some machine
	## learning algorithms... this is is 'summary' attribute
	sumfun <- attr(object, "summary")
	if (length(sumfun)) {
		res <- match.fun(sumfun)(object, ...)
	} else res <- object
	class(res) <- c("summary.mlearning", class(res))
	attr(res, "mlearning.class") <- mlearning.class
	attr(res, "algorithm") <- attr(object, "algorithm")
	attr(res, "type") <- attr(object, "type")
	attr(res, "mlearning.call") <- attr(object, "mlearning.call")
	res
}
		
print.summary.mlearning <- function (x, ...)
{
	cat("A mlearning object of class ", attr(x, "mlearning.class"), " (",
		attr(x, "algorithm"), "):\n", sep = "")
	type <- attr(x, "type")
	switch(type,
		regression = cat("[regression variant]\n"),
		unsupervised = cat("[unsupervised classification variant]\n"))
	cat("Initial call: ", deparse(attr(x, "mlearning.call")),
		"\n", sep = "")
	X <- x
	class(X) <- class(x)[-1]
	print(X)
	invisible(x)
}

plot.mlearning <- function (x, y, ...)
{
	train <- attr(x, "train")
	response <- attr(x, "response")
	class(x) <- class(x)[-(1:2)]
	plot(x, ...)
}

.membership <- function (x, levels, scale = TRUE)
{
	## Make sure x is a matrix of numerics
	x <- as.matrix(x)
	if (!is.numeric(x))
		stop("'x' must be numeric")
	
	## Make sure all columns are named with names in levels
	nms <- colnames(x)
	if (!length(nms))
		stop("missing column names in 'x'")
	if (any(!nms %in% levels))
		stop("One or more column in 'x' not in 'levels'")
	
	## Add columns of zeros for inexistant levels
	toAdd <- levels[!levels %in% nms]
	if (length(toAdd)) {
		xAdd <- matrix(0, nrow = NROW(x), ncol = length(toAdd))
		colnames(xAdd) <- toAdd
		x <- cbind(x, xAdd)
	}
	
	## Make sure columns are in the same order as levels
	x <- x[, levels]
	
	## Possibly scale to one, row-wise
	if (isTRUE(as.logical(scale)))
		x <- x / apply(x, 1, sum)
	
	x
}

.expandFactor <- function (f, n, ndrop)
{
	if (!length(ndrop) || class(ndrop) != "exclude") return(f)
	res <- factor(rep(NA, n), levels = levels(f))
	res[-ndrop] <- f
	res
}
	
.expandMatrix <- function (m, n, ndrop)
{
	if (!length(ndrop) || class(ndrop) != "exclude") return(m)
	res <- matrix(NA, nrow = n, ncol = ncol(m))
	res[-ndrop, ] <- m
	res
}

predict.mlearning <- function(object, newdata,
type = c("class", "membership", "both"), method = c("direct", "cv"),
na.action = na.exclude, ...)
{
	## Not usable for unsupervised type
	if (attr(object, "type") == "unsupervised")
		stop("no predict() method for unsupervised version")
	
	## If method == "cv", delegate to cvpredict()
	if (as.character(method)[1] == "cv") {
		if (!missing(newdata))
			stop("cannot handle new data with method = 'cv'")
		return(cvpredict(object = object, type = type, ...))
	}
	
	## Recalculate newdata according to formula...
	if (missing(newdata)) { # Use train
		newdata <- attr(object, "train")
	} else if (attr(object, "optim")) { # Use optimized approach
		## Just keep vars similar as in train
		vars <- names(attr(object, "train"))
		if (!all(vars %in% names(newdata)))
			stop("one or more missing variables in newdata")
		newdata <- newdata[, vars]
	} else { # Use model.frame
		newdata <- model.frame(formula = attr(object, "formula"),
			data = newdata, na.action = na.pass)[, names(attr(object, "train"))]
	}
	## Do we need only numerical predictors
	if (attr(object, "numeric.only"))
		newdata <- sapply(as.data.frame(newdata), as.numeric)
	
	## Determine how many data and perform na.action
	n <- NROW(newdata)
	newdata <- match.fun(na.action)(newdata)
	ndrop <- attr(newdata, "na.action")
		
	## Delegate to the original predict() method
	class(object) <- class(object)[-(1:2)]
	if (attr(object, "type") == "regression")
		return(predict(object, newdata = newdata, ...))
	
	## Otherwise, this is a supervised classification
	type <- as.character(type)[1]
	## Special case for both
	if (type == "both") type <- c("class", "membership")
	## Check that type is supported and look for corresponding type name
	## in original predict() method
	pred.type <- attr(object, "pred.type")
	if (!all(type %in% names(pred.type)))
		stop("unsupported predict type")
	
	if (length(type) == 2) {
		## Special case where we predict both class and membership
		classes <- predict(object, newdata = newdata,
			type = pred.type["class"], ...)
		members <- predict(object, newdata = newdata,
			type = pred.type["membership"], ...)
		## Create a list with both res
		levels <- levels(object)
		return(list(class = .expandFactor(factor(as.character(classes),
			levels = levels), n, ndrop),
			membership = .expandMatrix(.membership(members, levels = levels),
			n, ndrop)))
	} else {
		res <- predict(object, newdata = newdata, type = pred.type[type], ...)
	}
	
	## Rework result according to initial levels (before drop of empty ones)
	res <- switch(type,
		class = .expandFactor(factor(as.character(res), levels = levels(object)),
			n, ndrop),
		membership = .expandMatrix(.membership(res, levels = levels(object)),
			n, ndrop),
		switch(class(res)[1],
			factor = .expandFactor(res, n, ndrop),
			matrix = .expandMatrix(res, n, ndrop),
			res))
		
	res
}

cvpredict <- function (object, ...)
	UseMethod("cvpredict")

cvpredict.mlearning <- function(object, type = c("class", "membership", "both"),
cv.k = 10, cv.strat = TRUE, ...)
{
	type <- switch(attr(object, "type"),
		regression = "class", # Another way to ignore 'type' for regressions
		classification = as.character(type)[1],
		stop("works only for classification or regression mlearning objects"))
	
	if (type == "class") {
		predictions <- TRUE
		getmodels <- FALSE
	} else if (type == "membership") {
		predictions <- FALSE
		getmodels <- TRUE
	} else if (type == "both") {
		predictions <- TRUE
		getmodels <- TRUE
	} else stop("type must be 'class', 'membership' or 'both'")
	
	## Create data, using numbers are rownames
	data <- data.frame(.response. = response(object), train(object))
	rn <- rownames(data)
	rownames(data) <- 1:NROW(data)
	
	## The predict() method with ... arguments added to the call
	constructPredict <- function (...) {
		fun <- function (object, newdata) return()
		body(fun) <- as.call(c(list(substitute(predict),
			object = substitute(object), newdata = substitute(newdata)), list(...)))
		fun
	}
	Predict <- constructPredict(...)
	
	## Perform cross-validation for prediction
	args <- attr(object, "args")
	if (!is.list(args)) args <- list()
	args$formula <- substitute(.response. ~ .)
	args$data <- substitute(data)
	args$model <- substitute(mlearning)
	args$method <- attr(object, "method")
	args$predict <- substitute(Predict)
	args$estimator <- "cv"
	args$est.para <- control.errorest(predictions = predictions,
		getmodels = getmodels, k = cv.k, strat = cv.strat)
	est <- do.call(errorest, args)
	
	## Only class
	if (type == "class") {
		res <- est$predictions
	} else {
		## Need to calculate membership
		predCV <- function (x, object, ...) {
			Train <- train(object)
			rownames(Train) <- 1:NROW(Train)
			suppressWarnings(predict(x, newdata =
				Train[-as.numeric(rownames(train(x))), ], ...))
		}
	
		## Apply predict on all model and collect results together
		membership <- lapply(est$models, predCV, object = object,
			type = "membership", na.action = na.exclude, ...)
	
		## Concatenate results
		membership <- do.call(rbind, membership)
	
		## Sort in correct order and replace initial rownames
		ord <- as.numeric(rownames(membership))
		## Sometimes, errorest() duplicates one or two items in two models
		## (rounding errors?) => eliminate them here
		notDup <- !duplicated(ord)
		membership <- membership[notDup, ]
		ord <- ord[notDup]
		
		# Restore order of the items
		rownames(membership) <- rn[ord]
		pos <- order(ord)
		membership <- membership[pos, ]
	
		if (type == "membership") {
			res <- membership
		} else {  # Need both class and membership
			## Because we don't know who is who in est$predictions in case of
			## duplicated items in est$models, we prefer to recalculate classes
			classes <- unlist(lapply(est$models, predCV, object = object,
				type = "class", na.action = na.exclude, ...))
			classes <- classes[notDup]
			classes <- classes[pos]
			
			## Check that both classes levels are the same!
			if (any(levels(classes) != levels(est$predictions)))
				warning("cross-validated classes do not match")

			res <- list(class = classes, membership = membership)
		}
	}
	
	## Add est object as "method" attribute, without predictions or models
	est$name <- "cross-validation"
	est$predictions <- NULL
	est$models <- NULL
	est$call <- match.call()
	est$strat <- cv.strat
	attr(res, "method") <- est
	
	res
}

## Note: ldahist() in MASS (when only one LD) seems to be broken!
mlLda <- function (...)
	UseMethod("mlLda")

mlLda.formula <- function (formula, data, ..., subset, na.action)
	mlearning(formula, data = data, method = "mlLda", model.args =
		list(formula  = formula, data = substitute(data),
		subset = substitute(subset)), call = match.call(), ...,
		subset = subset, na.action = substitute(na.action))

mlLda.default <- function (train, response, ...)
{
	if (!is.factor(response))
		stop("only factor response (classification) accepted for mlLda")

	dots <- list(...)
	.args. <- dots$.args.
	dots$.args. <- NULL
	if (!length(.args.)) .args. <- list(levels = levels(response),
		n = c(intial = NROW(train), final = NROW(train)),
		type = "classification", na.action = "na.pass",
		mlearning.call = match.call(), method = "mlLda")
	
	## Check if there are factor predictors
	if (any(sapply(train, is.factor)))
		warning("force conversion from factor to numeric; may be not optimal or suitable")
	
	## Return a mlearning object
	structure(MASS:::lda.default(x = sapply(train, as.numeric),
		grouping = response, ...), formula = .args.$formula, train = train,
		response = response, levels = .args.$levels, n = .args.$n, args = dots,
		optim = .args.$optim, numeric.only = TRUE, type = .args.$type,
		pred.type = c(class = "class", membership = "posterior", projection = "x"),
		summary = NULL, na.action = .args.$na.action,
		mlearning.call = .args.$mlearning.call, method = .args.$method,
		algorithm = "linear discriminant analysis",
		class = c("mlLda", "mlearning", "lda"))
}

predict.mlLda <- function(object, newdata,
type = c("class", "membership", "both", "projection"), prior = object$prior,
dimension, method = c("plug-in", "predictive", "debiased", "cv"), ...)
{
	if (!inherits(object, "mlLda"))
		stop("'object' must be a 'mlLda' object")
	
	## If method == "cv", delegate to cvpredict()
	method <- as.character(method)[1]
	if (method == "cv") {
		if (!missing(newdata))
			stop("cannot handle new data with method = 'cv'")
		if (missing(dimension)) {
			return(cvpredict(object = object, type = type, prior = prior, ...))	
		} else {
			return(cvpredict(object = object, type = type, prior = prior,
				dimension = dimension, ...))
		}
	}
	
	## Recalculate newdata according to formula...
	if (missing(newdata)) { # Use train
		newdata <- attr(object, "train")
	} else if (attr(object, "optim")) { # Use optimized approach
		## Just keep vars similar as in train
		vars <- names(attr(object, "train"))
		if (!all(vars %in% names(newdata)))
			stop("One or more missing variables in newdata")
		newdata <- newdata[, vars]
	} else { # Use model.frame
		newdata <- model.frame(formula = attr(object, "formula"),
			data = newdata, na.action = na.pass)[, names(attr(object, "train"))]
	}
	## Only numerical predictors
	newdata <- sapply(as.data.frame(newdata), as.numeric)
	
	## dimension
	if (missing(dimension)) {
        dimension <- length(object$svd)
	} else {
		dimension <- min(dimension, length(object$svd))
	}
	
	## Delegate to the MASS predict.lda method
	class(object) <- class(object)[-(1:2)]
	## I need to suppress warnings, because NAs produce ennoying warnings!
	res <- suppressWarnings(predict(object, newdata = newdata, prior = prior,
		dimen = dimension, method = method, ...))
	
	## Rework results according to what we want
	switch(as.character(type)[1],
		class = factor(as.character(res$class), levels = levels(object)),
		membership = .membership(res$posterior, levels = levels(object)),
		both = list(class = factor(as.character(res$class),
			levels = levels(object)), membership = .membership(res$posterior,
			levels = levels(object))),
		projection = res$x,
		stop("unrecognized 'type' (must be 'class', 'membership', 'both' or 'projection')"))
}

mlQda <- function (...)
	UseMethod("mlQda")

mlQda.formula <- function (formula, data, ..., subset, na.action)
	mlearning(formula, data = data, method = "mlQda", model.args =
		list(formula  = formula, data = substitute(data),
		subset = substitute(subset)), call = match.call(), ...,
		subset = subset, na.action = substitute(na.action))

mlQda.default <- function (train, response, ...)
{
	if (!is.factor(response))
		stop("only factor response (classification) accepted for mlQda")

	dots <- list(...)
	.args. <- dots$.args.
	dots$.args. <- NULL
	if (!length(.args.)) .args. <- list(levels = levels(response),
		n = c(intial = NROW(train), final = NROW(train)),
		type = "classification", na.action = "na.pass",
		mlearning.call = match.call(), method = "mlQda")
	
	## Check if there are factor predictors
	if (any(sapply(train, is.factor)))
		warning("force conversion from factor to numeric; may be not optimal or suitable")
	
	## Return a mlearning object
	structure(MASS:::qda.default(x = sapply(train, as.numeric),
		grouping = response, ...), formula = .args.$formula, train = train,
		response = response, levels = .args.$levels, n = .args.$n, args = dots,
		optim = .args.$optim, numeric.only = TRUE, type = .args.$type,
		pred.type = c(class = "class", membership = "posterior"),
		summary = NULL, na.action = .args.$na.action,
		mlearning.call = .args.$mlearning.call, method = .args.$method,
		algorithm = "quadratic discriminant analysis",
		class = c("mlQda", "mlearning", "qda"))
}

predict.mlQda <- function(object, newdata, type = c("class", "membership", "both"),
prior = object$prior, method = c("plug-in", "predictive", "debiased", "looCV",
"cv"), ...)
{
	if (!inherits(object, "mlQda"))
		stop("'object' must be a 'mlQda' object")
	
	## If method == "cv", delegate to cvpredict()
	method <- as.character(method)[1]
	if (method == "cv") {
		if (!missing(newdata))
			stop("cannot handle new data with method = 'cv'")
		return(cvpredict(object = object, type = type, prior = prior, ...))
	}
	
	## Recalculate newdata according to formula...
	if (missing(newdata)) { # Use train
		newdata <- attr(object, "train")
	} else if (attr(object, "optim")) { # Use optimized approach
		## Just keep vars similar as in train
		vars <- names(attr(object, "train"))
		if (!all(vars %in% names(newdata)))
			stop("One or more missing variables in newdata")
		newdata <- newdata[, vars]
	} else { # Use model.frame
		newdata <- model.frame(formula = attr(object, "formula"),
			data = newdata, na.action = na.pass)[, names(attr(object, "train"))]
	}
	## Only numerical predictors
	newdata <- sapply(as.data.frame(newdata), as.numeric)
		
	## Delegate to the MASS predict.qda method
	class(object) <- class(object)[-(1:2)]
	## I need to suppress warnings, because NAs produce ennoying warnings!
	res <- suppressWarnings(predict(object, newdata = newdata, prior = prior,
		method = method, ...))
	
	## Rework results according to what we want
	switch(as.character(type)[1],
		class = factor(as.character(res$class), levels = levels(object)),
		membership = .membership(res$posterior, levels = levels(object)),
		both = list(class = factor(as.character(res$class),
			levels = levels(object)), membership = .membership(res$posterior,
			levels = levels(object))),
		stop("unrecognized 'type' (must be 'class', 'membership' or 'both')"))
}

mlRforest <- function (...)
	UseMethod("mlRforest")

mlRforest.formula <- function (formula, data, ntree = 500, mtry,
replace = TRUE, classwt = NULL, ..., subset, na.action)
{
	if (missing(mtry)) {
		mlearning(formula, data = data, method = "mlRforest", model.args =
			list(formula  = formula, data = substitute(data),
			subset = substitute(subset)), call = match.call(), ntree = ntree,
			replace = replace, classwt = classwt, ...,
			subset = subset, na.action = substitute(na.action))	
	} else {
		mlearning(formula, data = data, method = "mlRforest", model.args =
			list(formula  = formula, data = substitute(data),
			subset = substitute(subset)), call = match.call(), ntree = ntree,
			mtry = mtry, replace = replace, classwt = classwt, ...,
			subset = subset, na.action = substitute(na.action))	
	}
}

mlRforest.default <- function (train, response, ntree = 500, mtry,
replace = TRUE, classwt = NULL, ...)
{
	dots <- list(...)
	.args. <- dots$.args.
	dots$.args. <- NULL
	if (!length(.args.)) {
		if (!length(response)) {
			type <- "unsupervised"
		} else if (is.factor(response)) {
			type <- "classification"
		} else type <- "regression"
		.args. <- list(levels = levels(response),
		n = c(intial = NROW(train), final = NROW(train)),
		type = type, na.action = "na.pass",
		mlearning.call = match.call(), method = "mlRforest")
	}
	dots$ntree <- ntree
	dots$replace <- replace
	dots$classwt <- classwt
	
	## Return a mlearning object
	if (missing(mtry) || !length(mtry)) {
		res <- randomForest:::randomForest.default(x = train,
		y = response, ntree = ntree, replace = replace,
		classwt = classwt, ...)
	} else {
		dots$mtry <- mtry
		res <- randomForest:::randomForest.default(x = train,
		y = response, ntree = ntree, mtry = mtry, replace = replace,
		classwt = classwt, ...)
	}
	 
	structure(res, formula = .args.$formula, train = train,
		response = response, levels = .args.$levels, n = .args.$n, args = dots,
		optim = .args.$optim, numeric.only = FALSE, type = .args.$type,
		pred.type = c(class = "response", membership = "prob", vote ="vote"),
		summary = NULL, na.action = .args.$na.action,
		mlearning.call = .args.$mlearning.call, method = .args.$method,
		algorithm = "random forest",
		class = c("mlRforest", "mlearning", "randomForest"))
}

predict.mlRforest <- function(object, newdata,
type = c("class", "membership", "both", "vote"), method = c("direct", "oob", "cv"),
...)
{
	type <- as.character(type)[1]
	
	## If method == "cv", delegate to cvpredict()
	method <- as.character(method)[1]
	if (method == "cv") {
		if (!missing(newdata))
			stop("cannot handle new data with method = 'cv'")
		return(cvpredict(object = object, type = type, ...))
	} else if (method == "oob") { # Get out-of-bag prediction!
		if (!missing(newdata))
			stop("you cannot provide newdata with method = 'oob'")
		
		toProps <- function (x, ntree) {
			if (sum(x[1, ] > 1)) {
				res <- t(apply(x, 1, "/", ntree))
			} else res <- x
			class(res) <- "matrix"
			res
		}

		toVotes <- function (x, ntree) {
			if (sum(x[1, ] < ntree - 1)) {
				res <- round(t(apply(x, 1, "*", ntree)))
			} else res <- x
			class(res) <- "matrix"
			res
		}
		
		res <- switch(type,
			class = factor(as.character(object$predicted),
				levels = levels(object)),
			membership = .membership(toProps(object$votes, object$ntree),
				levels = levels(object)),
			both = list(class = factor(as.character(object$predicted),
				levels = levels(object)),
				membership = .membership(toProps(object$votes, object$ntree),
				levels = levels(object))),
			vote = .membership(toVotes(object$votes, object$ntree),
						levels = levels(object)),
			stop("unknown type, must be 'class', 'membership', 'both' or 'vote'"))
		
		attr(res, "method") <- list(name = "out-of-bag")
		res
		
	} else predict.mlearning(object = object, newdata = newdata,
		type = type, norm.votes = FALSE, ...)
}

mlNnet <- function (...)
	UseMethod("mlNnet")

mlNnet.formula <- function (formula, data, size = NULL, rang = NULL, decay = 0,
maxit = 1000, ..., subset, na.action)
	mlearning(formula, data = data, method = "mlNnet", model.args =
		list(formula  = formula, data = substitute(data),
		subset = substitute(subset)), call = match.call(), size = size,
		rang = rang, decay = decay, maxit = maxit, ...,
		subset = subset, na.action = substitute(na.action))

mlNnet.default <- function (train, response, size = NULL, rang = NULL, decay = 0,
maxit = 1000, ...)
{
	if (!length(response))
		stop("unsupervised classification not usable for mlNnet")

	nnetArgs <- dots <- list(...)
	.args. <- nnetArgs$.args.
	dots$.args. <- NULL
	dots$size <- size
	dots$rang <- rang
	dots$decay <- decay
	dots$maxit <- maxit
	nnetArgs$.args. <- NULL
	if (!length(.args.)) .args. <- list(levels = levels(response),
		n = c(intial = NROW(train), final = NROW(train)),
		type = if (is.factor(response)) "classification" else "regression",
		na.action = "na.pass", mlearning.call = match.call(), method = "mlNnet")
	
	## Construct arguments list for nnet() call
	nnetArgs$x <- sapply(train, as.numeric)
	
	## Weights
	if (!length(nnetArgs$weights)) nnetArgs$weights <- .args.$weights
	
	## size
	if (!length(size))
		size <- length(levels(response)) - 1 # Is this a reasonable default?
	nnetArgs$size <- size
	
	## rang
	if (!length(rang)) {
		## default is 0.7 in original nnet code,
		## but the doc proposes something else
		rang <- round(1 / max(abs(nnetArgs$x)), 2)
		if (rang < 0.01) rang <- 0.01
		if (rang > 0.7) rang <- 0.7
	}
	nnetArgs$rang <- rang
	
	## decay and maxit
	nnetArgs$decay <- decay
	nnetArgs$maxit <- maxit
			
	## TODO: should I need to implement this???
	#x <- model.matrix(Terms, m, contrasts)
	#cons <- attr(x, "contrast")
	#xint <- match("(Intercept)", colnames(x), nomatch = 0L)
	#if (xint > 0L) 
	#    x <- x[, -xint, drop = FALSE]
			
	## Classification or regression?
	if (is.factor(response)) {
		if (length(levels(response)) == 2L) {
			nnetArgs$y <- as.vector(unclass(response)) - 1
			nnetArgs$entropy <- TRUE
			res <- do.call(nnet.default, nnetArgs)
			res$lev <- .args.$levels
		} else {
			nnetArgs$y <- class.ind(response)
			nnetArgs$softmax <- TRUE
			res <- do.call(nnet.default, nnetArgs)
			res$lev <- .args.$levels
		}
	} else { # Regression
		nnetArgs$y <- response
		res <- do.call(nnet.default, nnetArgs)	
	}
	
	## Return a mlearning object
	structure(res, formula = .args.$formula, train = train,
		response = response, levels = .args.$levels, n = .args.$n, args = dots,
		optim = .args.$optim, numeric.only = TRUE, type = .args.$type,
		pred.type = c(class = "class", membership = "raw"),
		summary = "summary", na.action = .args.$na.action,
		mlearning.call = .args.$mlearning.call, method = .args.$method,
		algorithm = "single-hidden-layer neural network",
		class = c("mlNnet", "mlearning", "nnet"))
}

mlLvq <- function (...)
	UseMethod("mlLvq")

mlLvq.formula <- function (formula, data, k.nn = 5, size, prior,
algorithm = "olvq1", ..., subset, na.action)
{
	if (missing(size)) {
		if (missing(prior)) {
			mlearning(formula, data = data, method = "mlLvq", model.args =
				list(formula  = formula, data = substitute(data),
				subset = substitute(subset)), call = match.call(), k.nn = k.nn,
				algorithm = algorithm, ...,
				subset = subset, na.action = substitute(na.action))
		} else {
			mlearning(formula, data = data, method = "lvq", model.args =
				list(formula  = formula, data = substitute(data),
				subset = substitute(subset)), call = match.call(), k.nn = k.nn,
				prior = prior, algorithm = algorithm, ...,
				subset = subset, na.action = substitute(na.action))
		}
	} else {
		if (missing(prior)) {
			mlearning(formula, data = data, method = "lvq", model.args =
				list(formula  = formula, data = substitute(data),
				subset = substitute(subset)), call = match.call(), k.nn = k.nn,
				size = size, algorithm = algorithm, ...,
				subset = subset, na.action = substitute(na.action))
		} else {
			mlearning(formula, data = data, method = "lvq", model.args =
				list(formula  = formula, data = substitute(data),
				subset = substitute(subset)), call = match.call(), k.nn = k.nn,
				size = size, prior = prior, algorithm = algorithm, ...,
				subset = subset, na.action = substitute(na.action))
		}
	}
}

mlLvq.default <- function (train, response, k.nn = 5, size, prior,
algorithm = "olvq1", ...)
{
	if (!is.factor(response))
		stop("only factor response (classification) accepted for mlLvq")

	dots <- list(...)
	.args. <- dots$.args.
	dots$.args. <- NULL
	dots$k.nn <- k.nn
	dots$algorithm <- algorithm
	if (!length(.args.)) .args. <- list(levels = levels(response),
		n = c(intial = NROW(train), final = NROW(train)),
		type = "classification", na.action = "na.pass",
		mlearning.call = match.call(), method = "mlQda")
	
	## matrix of numeric values
	if (any(sapply(train, is.factor))) {
		warning("force conversion from factor to numeric; may be not optimal or suitable")
		train <- sapply(train, as.numeric)
	}
			
	## Default values for size and prior, if not provided
	n <- nrow(train)
	if (missing(prior) || !length(prior)) {
		prior <- tapply(rep(1, length(response)), response, sum) / n
	} else dots$prior <- prior
	if (missing(size) || !length(size)) {
		np <- length(prior)  
		size <- min(round(0.4 * np * (np - 1 + ncol(train) / 2), 0), n)
	} else dots$size <- size
			
	## Initialize codebook
	init <- lvqinit(train, response, k = k.nn, size = size, prior = prior)
	
	## Calculate final codebook
	if (algorithm == "olvq1") times <- 40 else times <- 100
	niter <- dots$niter
	if (!length(niter)) niter <- times * nrow(init$x) # Default value
	alpha <- dots$alpha
	if (!length(alpha)) alpha <- if (algorithm == "olvq1") 0.3 else 0.03
	win <- dots$win
	if (!length(win)) win <- 0.3
	epsilon <- dots$epsilon
	if (!length(epsilon)) epsilon <- 0.1
	codebk <- switch(algorithm,
		olvq1 = olvq1(train, response, init, niter = niter,
			alpha = alpha),
		lvq1 = lvq1(train, response, init, niter = niter,
			alpha = alpha),
		lvq2 = lvq2(train, response, init, niter = niter,
			alpha = alpha, win = win),
		lvq3 = lvq3(train, response, init, niter = niter,
			alpha = alpha, win = win, epsilon = epsilon),
		stop("algorithm must be 'lvq1', 'lvq2', 'lvq3' or 'olvq1'"))
				
	## Return a mlearning object
	structure(codebk, formula = .args.$formula, train = train,
		response = response, levels = .args.$levels, n = .args.$n, args = dots,
		optim = .args.$optim, numeric.only = TRUE, type = .args.$type,
		pred.type = c(class = "class"), summary = "summary.lvq",
		na.action = .args.$na.action,
		mlearning.call = .args.$mlearning.call, method = .args.$method,
		algorithm = "learning vector quantization",
		class = c("mlLvq", "mlearning", class(codebk)))
}

summary.lvq <- function (object, ...)
	structure(cbind(Class = object$cl, as.data.frame(object$x)),
		class = c("summary.lvq", "data.frame"))

print.summary.lvq <- function (x, ...)
{
	cat("Codebook:\n")
	print(as.data.frame(x))
	invisible(x)
}

predict.mlLvq <- function (object, newdata, type = "class",
method = c("direct", "cv"), na.action = na.exclude, ...)
{
	if (!inherits(object, "mlLvq"))
		stop("'object' must be a 'mlLvq' object")
	if (type != "class") stop("Only 'class' currently supported for type")
    
	## If method == "cv", delegate to cvpredict()
	if (as.character(method)[1] == "cv") {
		if (!missing(newdata))
			stop("cannot handle new data with method = 'cv'")
		return(cvpredict(object = object, type = type, ...))
	}
	
	## Recalculate newdata according to formula...
	if (missing(newdata)) { # Use train
		newdata <- attr(object, "train")
	} else if (attr(object, "optim")) { # Use optimized approach
		## Just keep vars similar as in train
		vars <- colnames(attr(object, "train"))
		if (!all(vars %in% names(newdata)))
			stop("one or more missing variables in newdata")
		newdata <- newdata[, vars]
	} else { # Use model.frame
		newdata <- model.frame(formula = attr(object, "formula"),
			data = newdata, na.action = na.pass)[, names(attr(object, "train"))]
	}
	newdata <- sapply(as.data.frame(newdata), as.numeric)
	
	## Determine how many data and perform na.action
	n <- NROW(newdata)
	newdata <- match.fun(na.action)(newdata)
	ndrop <- attr(newdata, "na.action")
	
	.expandFactor(lvqtest(object, newdata), n, ndrop)
}

## svm from e1071 package
mlSvm <- function (...)
	UseMethod("mlSvm")

mlSvm.formula <- function(formula, data, scale = TRUE, type = NULL,
kernel = "radial", classwt = NULL, ..., subset, na.action)
	mlearning(formula, data = data, method = "mlSvm", model.args =
		list(formula  = formula, data = substitute(data),
		subset = substitute(subset)), call = match.call(),
		..., subset = subset, na.action = substitute(na.action))

mlSvm.default <- function (train, response, scale = TRUE, type = NULL,
kernel = "radial", classwt = NULL, ...)
{
	dots <- list(...)
	.args. <- dots$.args.
	dots$.args. <- NULL
	if (!length(.args.)) {
		if (is.factor(response)) {
			Type <- "classification"
		} else Type <- "regression"
		.args. <- list(levels = levels(response),
		n = c(intial = NROW(train), final = NROW(train)),
		type = Type, na.action = "na.pass",
		mlearning.call = match.call(), method = "mlSvm")
	}
	dots$scale <- scale
	dots$type <- type
	dots$kernel <- kernel
	dots$class.weigths <- classwt
	#dots$probability <- TRUE
	
	## Return a mlearning object
	structure(e1071:::svm.default(x = sapply(train, as.numeric), y = response,
		scale = scale, type = type, kernel = kernel, class.weights = classwt,
		probability = TRUE, ...), formula = .args.$formula, train = train,
		response = response, levels = .args.$levels, n = .args.$n, args = dots,
		optim = .args.$optim, numeric.only = TRUE, type = .args.$type,
		pred.type = c(class = "class", membership = "raw"),
		summary = "summary", na.action = .args.$na.action,
		mlearning.call = .args.$mlearning.call, method = .args.$method,
		algorithm = "support vector machine",
		class = c("mlSvm", "mlearning", "svm"))
}

predict.mlSvm <- function(object, newdata,
type = c("class", "membership", "both"), method = c("direct", "cv"),
na.action = na.exclude, ...)
{
	if (!inherits(object, "mlSvm"))
		stop("'object' must be a 'mlSvm' object")
	
	## If method == "cv", delegate to cvpredict()
	method <- as.character(method)[1]
	if (method == "cv") {
		if (!missing(newdata))
			stop("cannot handle new data with method = 'cv'")
		return(cvpredict(object = object, type = type, ...))	
	}
	
	## Recalculate newdata according to formula...
	if (missing(newdata)) { # Use train
		newdata <- attr(object, "train")
	} else if (attr(object, "optim")) { # Use optimized approach
		## Just keep vars similar as in train
		vars <- names(attr(object, "train"))
		if (!all(vars %in% names(newdata)))
			stop("One or more missing variables in newdata")
		newdata <- newdata[, vars]
	} else { # Use model.frame
		newdata <- model.frame(formula = attr(object, "formula"),
			data = newdata, na.action = na.pass)[, names(attr(object, "train"))]
	}
	## Only numerical predictors
	newdata <- sapply(as.data.frame(newdata), as.numeric)
	
	## Determine how many data and perform na.action
	n <- NROW(newdata)
	newdata <- match.fun(na.action)(newdata)
	ndrop <- attr(newdata, "na.action")
	attr(newdata, "na.action") <- NULL
		
	## Delegate to the e1071 predict.svm method
	if (as.character(type)[1] == "class") proba <- FALSE else proba <- TRUE
	class(object) <- class(object)[-(1:2)]
	if (attr(object, "type") == "regression")
		return(predict(object, newdata = newdata, ...))
	
	## This is for classification
	res <- predict(object, newdata = newdata,
		probability = proba, ...)
	proba <- attr(res, "probabilities")	
	
	## Rework results according to what we want
	switch(as.character(type)[1],
		class = .expandFactor(factor(as.character(res), levels = levels(object)),
			n, ndrop),
		membership = .expandMatrix(.membership(proba, levels = levels(object)),
			n, ndrop),
		both = list(class = .expandFactor(factor(as.character(res),
			levels = levels(object)), n, ndrop),
			membership = .expandMatrix(.membership(proba, levels = levels(object)),
			n, ndrop)),
		stop("unrecognized 'type' (must be 'class', 'membership' or 'both')"))
}

## NaiveBayes from e1071 package
mlNaiveBayes <- function (...)
	UseMethod("mlNaiveBayes")

mlNaiveBayes.formula <- function(formula, data, laplace = 0, ...,
subset, na.action)
	mlearning(formula, data = data, method = "mlNaiveBayes", model.args =
		list(formula  = formula, data = substitute(data),
		subset = substitute(subset)), call = match.call(), laplace = laplace,
		..., subset = subset, na.action = substitute(na.action))

mlNaiveBayes.default <- function (train, response, laplace = 0, ...)
{
	if (!is.factor(response))
		stop("only factor response (classification) accepted for mlNaiveBayes")

	dots <- list(...)
	.args. <- dots$.args.
	dots$.args. <- NULL
	dots$laplace <- laplace
	if (!length(.args.)) .args. <- list(levels = levels(response),
		n = c(intial = NROW(train), final = NROW(train)),
		type = "classification", na.action = "na.pass",
		mlearning.call = match.call(), method = "mlNaiveBayes")
	
	## Return a mlearning object
	structure(e1071:::naiveBayes.default(x = train, y = response,
		laplace = laplace, ...), formula = .args.$formula, train = train,
		response = response, levels = .args.$levels, n = .args.$n, args = dots,
		optim = .args.$optim, numeric.only = FALSE, type = .args.$type,
		pred.type = c(class = "class", membership = "raw"),
		summary = NULL, na.action = .args.$na.action,
		mlearning.call = .args.$mlearning.call, method = .args.$method,
		algorithm = "naive Bayes classifier",
		class = c("mlNaiveBayes", "mlearning", "naiveBayes"))
}
	
## NaiveBayes from RWeka package
## TODO: keep this for mlearningWeka package!
#mlNaiveBayesWeka <- function (...)
#	UseMethod("mlNaiveBayesWeka")
#
#mlNaiveBayesWeka.formula <- function(formula, data, ..., subset, na.action)
#	mlearning(formula, data = data, method = "mlNaiveBayesWeka", model.args =
#		list(formula  = formula, data = substitute(data),
#		subset = substitute(subset)), call = match.call(),
#		..., subset = subset, na.action = substitute(na.action))
#
#mlNaiveBayesWeka.default <- function (train, response, ...)
#{
#	if (!is.factor(response))
#		stop("only factor response (classification) accepted for mlNaiveBayesWeka")
#
#	.args. <- dots <- list(...)$.args.
#	if (!length(.args.)) .args. <- list(levels = levels(response),
#		n = c(intial = NROW(train), final = NROW(train)),
#		type = "classification", na.action = "na.pass",
#		mlearning.call = match.call(), method = "mlNaiveBayesWeka")
#	
#	wekaArgs <- list(control = .args.$control)
#	
#	## If response is not NULL, add it to train
#	if (length(response)) {
#		formula <- .args.$formula
#		if (!length(formula)) response.label <- "Class" else
#			response.label <- all.vars(formula)[1]
#		data <- data.frame(response, train)
#		names(data) <- c(response.label, colnames(train))
#		wekaArgs$data <- data
#		wekaArgs$formula <- as.formula(paste(response.label, "~ ."))
#	} else { # Unsupervised classification
#		wekaArgs$data <- train
#		wekaArgs$formula <- ~ . 
#	}
#	
#	WekaClassifier <- make_Weka_classifier("weka/classifiers/bayes/NaiveBayes")
#	
#	## Return a mlearning object
#	structure(do.call(WekaClassifier, wekaArgs), formula = .args.$formula,
#		train = train, response = response, levels = .args.$levels, n = .args.$n,
#		args = dots, optim = .args.$optim, numeric.only = FALSE,
#		type = .args.$type, pred.type = c(class = "class", membership = "probability"),
#		summary = "summary", na.action = .args.$na.action,
#		mlearning.call = .args.$mlearning.call, method = .args.$method,
#		algorithm = "Weka naive Bayes classifier",
#		class = c("mlNaiveBayesWeka", "mlearning", "Weka_classifier"))
#}
Any scripts or data that you put into this service are public.
mlearning documentation built on May 2, 2019, 6:05 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
mlearning
Machine learning algorithms with unified interface and confusion matrices

R/mlearning.R
In mlearning: Machine learning algorithms with unified interface and confusion matrices

Defines functions predict.mlearning cvpredict.mlearning

Documented in cvpredict.mlearning predict.mlearning

Try the mlearning package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

mlearning Machine learning algorithms with unified interface and confusion matrices

R/mlearning.R In mlearning: Machine learning algorithms with unified interface and confusion matrices

Defines functions predict.mlearning cvpredict.mlearning

Documented in cvpredict.mlearning predict.mlearning

Try the mlearning package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

mlearning
Machine learning algorithms with unified interface and confusion matrices

R/mlearning.R
In mlearning: Machine learning algorithms with unified interface and confusion matrices