R/oslda.R
In locClass: Collection of Local Classification Methods

Documented in oslda oslda.formula predict.oslda

#  Copyright (C) 2011 J. Schiffner
#  Copyright (C) 1994-2004 W. N. Venables and B. D. Ripley
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 or 3 of the License
#  (at your option).
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  A copy of the GNU General Public License is available at
#  http://www.r-project.org/Licenses/
#

#' A localized version of Linear Discriminant Analysis.
#'
#' This is an alternative implementation of Local Linear Discriminant Analysis proposed by Czogiel et al. (2007) and 
#' implemented in \code{\link[klaR]{loclda}} in package \pkg{klaR}.
#'
#' The name of the window function (\code{wf}) can be specified as a character string.
#' In this case the window function is generated internally in \code{predict.oslda}. Currently
#' supported are \code{"biweight"}, \code{"cauchy"}, \code{"cosine"}, \code{"epanechnikov"}, 
#' \code{"exponential"}, \code{"gaussian"}, \code{"optcosine"}, \code{"rectangular"} and 
#' \code{"triangular"}.
#'
#' Moreover, it is possible to generate the window functions mentioned above in advance 
#' (see \code{\link[=biweight]{wfs}}) and pass them to \code{oslda}. 
#'
#' Any other function implementing a window function can also be used as \code{wf} argument.
#' This allows the user to try own window functions.
#' See help on \code{\link[=biweight]{wfs}} for details.
#'
#' If the predictor variables include factors, the formula interface must be used in order 
#' to get a correct model matrix.
#'
#' @title Observation Specific Linear Discriminant Analysis
#'
#' @param formula A \code{formula} of the form \code{groups ~ x1 + x2 + \dots}, that is, the response
#' is the grouping \code{factor} and the right hand side specifies the (non-\code{factor})
#' discriminators.  
#' @param data A \code{data.frame} from which variables specified in \code{formula} are to be taken.
#' @param x (Required if no \code{formula} is given as principal argument.) A \code{matrix} or \code{data.frame} or \code{Matrix} containing the explanatory variables.
#' @param grouping (Required if no \code{formula} is given as principal argument.) A \code{factor} specifying
#' the class membership for each observation.
#' @param wf A window function which is used to calculate weights that are introduced into 
#' the fitting process. Either a character string or a function, e.g. \code{wf = function(x) exp(-x)}.
#' For details see the documentation for \code{\link[=biweight]{wfs}}.
#' @param bw (Required only if \code{wf} is a string.) The bandwidth parameter of the window function. (See \code{\link[=biweight]{wfs}}.)
#' @param k (Required only if \code{wf} is a string.) The number of nearest neighbors of the decision boundary to be used in the fitting process. (See \code{\link[=biweight]{wfs}}.)
#' @param nn.only (Required only if \code{wf} is a string indicating a window function with infinite support and if \code{k} is specified.) Should
#' only the \code{k} nearest neighbors or all observations receive positive weights? (See \code{\link[=biweight]{wfs}}.)
#' @param method Method for scaling the pooled weighted covariance matrix, either \code{"unbiased"} or maximum-likelihood (\code{"ML"}). Defaults to \code{"unbiased"}.
#' @param \dots Further arguments.
#' @param subset An index vector specifying the cases to be used in the training sample. (NOTE: If given, this argument must be named.) 
#' @param na.action A function to specify the action to be taken if NAs are found. The default action is first
#' the \code{na.action} setting of \code{\link{options}} and second \code{\link{na.fail}} if that is unset. 
#' An alternative is \code{\link{na.omit}}, which leads to rejection of cases with missing values on any required 
#' variable. (NOTE: If given, this argument must be named.)
#'
#' @return An object of class \code{"oslda"}, a \code{list} containing the following components:
#'   \item{x}{A \code{matrix} containing the explanatory variables.}
#'   \item{grouping}{A \code{factor} specifying the class membership for each observation.}
#'   \item{counts}{The number of observations per class.}
#'   \item{lev}{The class labels (levels of \code{grouping}).}  
#'   \item{N}{The number of observations.}
#'   \item{wf}{The window function used. Always a function, even if the input was a string.}
#'   \item{bw}{(Only if \code{wf} is a string or was generated by means of one of the functions documented in \code{\link[=biweight]{wfs}}.) 
#'	  The bandwidth used, \code{NULL} if \code{bw} was not specified.}
#' 	 \item{k}{(Only if \code{wf} is a string or was generated by means of one of the functions documented in \code{\link[=biweight]{wfs}}.) 
#'	  The number of nearest neighbors used, \code{NULL} if \code{k} was not specified.}
#'   \item{nn.only}{(Logical. Only if \code{wf} is a string or was generated by means of one of the functions documented in \code{\link[=biweight]{wfs}} and if \code{k} was
#'	 specified.) \code{TRUE} if only the \code{k} nearest neighbors recieve a positive weight, \code{FALSE} otherwise.}
#'   \item{adaptive}{(Logical.) \code{TRUE} if the bandwidth of \code{wf} is adaptive to the local density of data points, \code{FALSE} if the bandwidth
#'	  is fixed.}
#'   \item{method}{The method for scaling the weighted covariance matrices, either \code{"unbiased"} or \code{"ML"}.}
#'   \item{variant}{(Only if \code{wf} is a string or one of the window functions documented in \code{\link[=biweight]{wfs}} is used, for internal use only). 
#'	  An integer indicating which weighting scheme is implied by \code{bw}, \code{k} and \code{nn.only}.}
#'   \item{call}{The (matched) function call.}
#'
#' @references 
#' Czogiel, I., Luebke, K., Zentgraf, M. and Weihs, C. (2007), Localized linear discriminant analysis.
#' In Decker, R. and Lenz, H.-J., editors, Advances in Data Analysis, volume 33 of Studies in Classification,
#' Data Analysis, and Knowledge Organization, pages 133--140, Springer, Berlin Heidelberg.
#'
#' @seealso \code{\link{predict.oslda}}.
#'
#'
#' @keywords classif multivariate
#'
#' @aliases oslda oslda.data.frame oslda.default oslda.formula oslda.matrix
#'
#' @export

oslda <- function(x, ...)
	UseMethod("oslda")
	
	
	
#' @rdname oslda
#' @method oslda formula
#'
#' @S3method oslda formula

oslda.formula <- function(formula, data, ..., subset, na.action) {
    m <- match.call(expand.dots = FALSE)
    m$... <- NULL
    m[[1L]] <- as.name("model.frame")
    m <- eval.parent(m)
    Terms <- attr(m, "terms")
    grouping <- model.response(m)
    x <- model.matrix(Terms, m)
    xint <- match("(Intercept)", colnames(x), nomatch = 0L)
    if (xint > 0) 
		x <- x[, -xint, drop = FALSE]
    res <- oslda.default(x, grouping, ...)
    res$terms <- Terms
    cl <- match.call()
    cl[[1L]] <- as.name("oslda")
    res$call <- cl
    res$contrasts <- attr(x, "contrasts")
    res$xlevels <- .getXlevels(Terms, m)
    res$na.action <- attr(m, "na.action")
    res
}



#' @rdname oslda
#' @method oslda data.frame
#'
#' @S3method oslda data.frame

oslda.data.frame <- function (x, ...) {
	res <- oslda(structure(data.matrix(x, rownames.force = TRUE), class = "matrix"), ...)
	cl <- match.call()
	cl[[1L]] <- as.name("oslda")
	res$call <- cl
	res
}



#' @rdname oslda
#' @method oslda matrix
#'
#' @S3method oslda matrix

oslda.matrix <- function (x, grouping, ..., subset, na.action = na.fail) {
    if (!missing(subset)) {
        x <- x[subset, , drop = FALSE]
        grouping <- grouping[subset]
    }
    if (missing(na.action)) {
        if (!is.null(naa <- getOption("na.action")))    # if options(na.action = NULL) the default of na.action comes into play
            if (!is.function(naa))
            	na.action <- get(naa, mode = "function")
            else
                na.action <- naa
    } 
    dfr <- na.action(structure(list(g = grouping, x = x), class = "data.frame", row.names = rownames(x)))
    grouping <- dfr$g
    x <- dfr$x
    res <- oslda.default(x, grouping, ...)
    cl <- match.call()
    cl[[1L]] <- as.name("oslda")
    res$call <- cl
	res$na.action <- na.action
    res
}



#' @rdname oslda
#' @method oslda default
#'
#' @S3method oslda default

oslda.default <- function (x, grouping, wf = c("biweight", "cauchy", "cosine", "epanechnikov", 
	"exponential", "gaussian", "optcosine", "rectangular", "triangular"), bw, k, nn.only = TRUE, 
	method = c("unbiased", "ML"), ...) {
	if (is.null(dim(x))) 
        stop("'x' is not a matrix")
    x <- as.matrix(x)
    n <- nrow(x)
    if (any(!is.finite(x))) 
        stop("infinite, NA or NaN values in 'x'")
    if (nrow(x) != length(grouping)) 
        stop("nrow(x) and length(grouping) are different")
    if (!is.factor(grouping))
    	warning("'grouping' was coerced to a factor")
    g <- as.factor(grouping)
    lev <- lev1 <- levels(g)
    counts <- as.vector(table(g))
    if (any(counts == 0)) {
        empty <- lev[counts == 0]
        warning(sprintf(ngettext(length(empty), "group %s is empty", 
            "groups %s are empty"), paste(empty, collapse = ", ")), 
            domain = NA)
        lev1 <- lev[counts > 0]
        g <- factor(g, levels = lev1)
        counts <- as.vector(table(g))
    }
	names(counts) <- lev1
    if (length(lev1) == 1L)
    	stop("training data from only one group given")
	method <- match.arg(method)
	## checks on k and bw
    if (is.character(wf)) {
    	m <- match.call(expand.dots = FALSE)
    	m$n <- n
    	m[[1L]] <- as.name("checkwf")
    	check <- eval.parent(m)
    	cl <- match.call()
    	cl[[1]] <- as.name("oslda")
    	return(structure(list(x = x, grouping = g, counts = counts, lev = lev, N = n, wf = check$wf, bw = check$bw, k = check$k, nn.only = check$nn.only, 
    		adaptive = check$adaptive, method = method, variant = check$variant, call = cl), class = "oslda"))
    } else if (is.function(wf)) {
    	if (!missing(k))
    		warning("argument 'k' is ignored")
    	if (!missing(bw))
    		warning("argument 'bw' is ignored")
    	if (!missing(nn.only))
    		warning("argument 'nn.only' is ignored")
    	if (!is.null(attr(wf, "adaptive"))) {
    		if (attr(wf, "adaptive")) {			# adaptive bandwidth
     				if (attr(wf, "k") + 1 > n)
    					stop("'k + 1' is larger than 'nrow(x)'")
     				if (attr(wf, "nn.only")) 	# only knn
    					variant <- 3
    				else						# all observations
    					variant <- 4
#    				if (attr(wf, "name") == "rectangular" && attr(wf, "k") == n) { # todo
#    					warning("nonlocal solution")
#    					variant <- 0
#					}   					
    		} else {							# fixed bandwidth
    			if (!is.null(attr(wf, "k"))) {
    				if (attr(wf, "k") > n)
    					stop("'k' is larger than 'nrow(x)'")
    				variant <- 2
    			} else 
    				variant <- 1
    		}
    	} else
    		variant <- NULL
    	cl <- match.call()
    	cl[[1]] <- as.name("oslda")
    	return(structure(list(x = x, grouping = g, counts = counts, lev = lev, N = n, wf = wf, bw = attr(wf, "bw"), k = attr(wf, "k"), 
    		nn.only = attr(wf, "nn.only"), adaptive = attr(wf, "adaptive"), method = method, variant = variant, call = cl), class = "oslda"))
    } else
		stop("argument 'wf' has to be either a character or a function")
}	
	


# @param x An \code{oslda} object.
# @param \dots Further arguments to \code{\link{print}}.
#
#' @method print oslda
#' @noRd
#'
#' @S3method print oslda

print.oslda <- function(x, ...) {
    if (!is.null(cl <- x$call)) {
        names(cl)[2L] <- ""
        cat("Call:\n")
        dput(cl, control = NULL)
    }
    if(is.character(x$wf)) {
	    cat("\nWindow function: ")
        cat(x$wf, sep="\n")  
    } else {
        cat("\nWindow function:\n")
	    cat(deparse(x$wf), sep = "\n")  
    }
    if(!is.null(x$bw))
        cat("Bandwidth: ", x$bw, "\n")
    if(!is.null(x$k))
        cat("k: ", x$k, "\n")
    if(!is.null(x$nn.only))
        cat("Nearest neighbors only: ", x$nn.only, "\n")
    if(!is.null(x$adaptive))
        cat("Adaptive bandwidth: ", x$adaptive, "\n")
	invisible(x)
}



#' Classify multivariate observations in conjunction with \code{\link{oslda}}.
#'
#' This function is a method for the generic function \code{predict()} for class 
#' \code{"oslda"}. 
#' It can be invoked by calling \code{predict(x)} for an object \code{x} of the 
#' appropriate class, or directly by calling \code{predict.oslda(x)} regardless of 
#' the class of the object. 
#'
#' @title Classify Multivariate Observations Based on Observation Specific Linear Discriminant Analysis
#'
#' @param object Object of class \code{"oslda"}.
#' @param newdata A \code{data.frame} of cases to be classified or, if \code{object} has a
#' \code{formula}, a \code{data.frame} with columns of the same names as the
#' variables used. A vector will be interpreted as a row
#' vector. If \code{newdata} is missing, an attempt will be made to
#' retrieve the data used to fit the \code{oslda} object.
#' @param \dots Further arguments.
#'
#' @return A \code{list} with components:
#' \item{class}{The predicted class labels (a \code{factor}).}
#' \item{posteriors}{Matrix of class posterior probabilities.}
#'
#' @seealso \code{\link{oslda}}.
#'
#' 
#' @keywords classif
#' 
#' @method predict oslda
#' @rdname predict.oslda
#'
#' @S3method predict oslda
#'
#' @useDynLib locClass


## todo: na.action
predict.oslda <- function(object, newdata, ...) {	
    if (!inherits(object, "oslda")) 
        stop("object not of class \"oslda\"")
    if (missing(newdata)) {
    	x <- object$x
    } else {
	    if (!is.null(Terms <- object$terms)) {
	        newdata <- model.frame(as.formula(delete.response(Terms)), 
    	        newdata, na.action = function(x) x, xlev = object$xlevels)
       	 	x <- model.matrix(delete.response(Terms), newdata, contrasts = object$contrasts)
        	xint <- match("(Intercept)", colnames(x), nomatch = 0)
        	if (xint > 0) 
            	x <- x[, -xint, drop = FALSE]
    	} else {
	        if (is.null(dim(newdata))) 
    	        dim(newdata) <- c(1, length(newdata))
        	x <- as.matrix(newdata)
    	}	
    }
    # if (!is.null(Terms <- object$terms)) {
        # if (missing(newdata)) 
            # newdata <- model.frame(object)
        # else {
            # newdata <- model.frame(as.formula(delete.response(Terms)), 
                # newdata, na.action = function(x) x, xlev = object$xlevels)
        # }
        # x <- model.matrix(delete.response(Terms), newdata, contrasts = object$contrasts)
        # xint <- match("(Intercept)", colnames(x), nomatch = 0)
        # if (xint > 0) 
            # x <- x[, -xint, drop = FALSE]
    # }
    # else {
        # if (missing(newdata)) {
            # if (!is.null(sub <- object$call$subset)) 
                # newdataa <- eval.parent(parse(text = paste(deparse(object$call$x, 
                  # backtick = TRUE), "[", deparse(sub, backtick = TRUE), 
                  # ",]")))
            # else newdata <- eval.parent(object$call$x)
            # if (!is.null(nas <- object$call$na.action)) 
                # newdata <- eval(call(nas, newdata))
        # }
        # if (is.null(dim(newdata))) 
            # dim(newdata) <- c(1, length(newdata))
        # x <- as.matrix(newdata)
    # }
    methods <- c("unbiased", "ML")
    object$method <- match(object$method, methods)
	wfs <- c("biweight", "cauchy", "cosine", "epanechnikov", "exponential", "gaussian",
		"optcosine", "rectangular", "triangular")
	if (is.function(object$wf) && !is.null(attr(object$wf,"name")) && attr(object$wf, "name") %in% wfs)
		object$wf <- attr(object$wf, "name")	
    if (is.character(object$wf)) {
		wfs <- paste(wfs, rep(1:3, each = length(wfs)), sep = "")
		wfs <- c(wfs, "cauchy4", "exponential4", "gaussian4")
		object$wf <- paste(object$wf, object$variant, sep = "")
		object$wf <- match(object$wf, wfs)
	}
    posterior <- .Call("predoslda", x, object$x, object$grouping, object$wf, ifelse(is.integer(object$wf) && !is.null(object$bw), object$bw, 0), 
    	ifelse(is.integer(object$wf) && !is.null(object$k), as.integer(object$k), 0L), object$method, new.env())
	lev1 <- levels(object$grouping)	# class labels that are in training data
    gr <- factor(lev1[max.col(posterior)], levels = object$lev)
    names(gr) <- rn <- rownames(x)
    if (is.null(rn)) {
		rn <- seq_along(gr)
		names(gr) <- rn
		rownames(posterior) <- rn    	
    }
    posterior <- exp(posterior - apply(posterior, 1L, max, na.rm = TRUE))
    posterior <- posterior/rowSums(posterior)
    return(list(class = gr, posterior = posterior))
}