R/distancematrix.R

#'Distance matrix
#'
#'The distancematrix function is used to reformat the input distance matrix
#'into the format required by the nonbipartite matching Fortran code. The
#'original matrix should have dimensions \eqn{NxN}, where \eqn{N} is the total
#'number of elements to be matched.  The matrix may be created in R and input
#'into the distancematrix function.  Alternately, the matrix may be read in
#'from a CSV file, i.e. a text file where distances in a given row are
#'delimited by commas.  If a list element is given, it should have a data.frame
#'element named "dist", preferably generated by the gendistance function.
#'
#'\itemize{ \item The distancematrix function is used to reformat the input
#'distance matrix into the format required by the nonbipartite matching Fortran
#'code.
#'
#'\item If an extra column or row is present, it will be converted into row
#'names.  In other words, if the matrix has dimensions \eqn{(N+1)}x\eqn{N}, or
#'\eqn{N}x\eqn{(N+1)}, then the function will take the first row, or column, as
#'an ID column.  If both row and column names are present, i.e. a
#'\eqn{(N+1)}x\eqn{(N+1)} matrix, the function cannot identify the names.
#'
#'\item If an odd number of elements exist, a ghost element, or sink, will be
#'created whose distance is zero to all of the other elements.  For example,
#'when matching 17 elements, the function will create an 18th element that
#'matches every element perfectly.  This sink may or not be appropriate for
#'your application.  Naturally, you may create sinks as needed in the distance
#'matrix you input to the distancematrix function.
#'
#'\item The elements of distancematrix may not be re-assigned once created.  In
#'other words, you cannot edit the formatted distance matrix.  You need to edit
#'the matrix being input into the distancematrix function.  }
#'
#'@aliases distancematrix distancematrix-class distancematrix,matrix-method
#'distancematrix,character-method distancematrix,data.frame-method
#'distancematrix,list-method
#'@param x A matrix, data.frame, list or filename.  This should be an
#'\eqn{N}x\eqn{N} distance matrix for the \eqn{N} elements to be matched.  The
#'values in the diagonal are ignored because an element cannot be matched to
#'itself.  Using zeros in the diagonal is preferable, although other values are
#'acceptable provided they are not so large that they distort the scaling of
#'the other values.
#'@param \dots Additional arguments, potentially used when reading in a
#'filename and passed into read.csv.
#'@return distancematrix S4 object
#'@exportClass distancematrix
#'@exportMethod distancematrix
#'@rdname distancematrix
#'@name distancematrix
#'@usage distancematrix(x, ...)
#'@author Cole Beck
#'@seealso \code{\link{nonbimatch} \link{gendistance}}
#'@examples
#'
#'plainmatrix<-as.matrix(dist(sample(1:25, 8, replace=TRUE)))
#'diag(plainmatrix) <- 99999  # setting diagonal to an infinite distance for
#'                            # pedagogical reasons (the diagonal may be left
#'                            # as zero)
#'mdm<-distancematrix(plainmatrix)
#'df <- data.frame(id=LETTERS[1:25], val1=rnorm(25), val2=rnorm(25))
#'df[sample(seq_len(nrow(df)), ceiling(nrow(df)*0.1)), 2] <- NA
#'df.dist <- gendistance(df, idcol=1, ndiscard=2)
#'mdm2 <- distancematrix(df.dist)
#'

setClass("distancematrix", contains="matrix")
setMethod("initialize", "distancematrix", function(.Object, ...) {
    .Object <- callNextMethod()
    nr <- nrow(.Object)
    nc <- ncol(.Object)
    if(nr == 0 || nc == 0) {
        stop("Row and column lengths must exceed zero")
    }
    if(abs(nr-nc) > 1)
        stop("Row and column lengths must be equal")
    if(nr != nc) {
        if(nr > nc) {
            mynames <- .Object[1,]
            .Object@.Data <- .Object@.Data[-1,]
            nr <- nr-1
        } else {
            mynames <- .Object[,1]
            .Object@.Data <- .Object@.Data[,-1]
            nc <- nc-1
        }
        colnames(.Object) <- mynames
        rownames(.Object) <- mynames
    }
    if(any(is.na(as.numeric(.Object)))) {
        stop("Elements of a distance matrix must be numeric")
    }
    # if no rownames, default to row number
    if(is.null(dimnames(.Object))) {
      mynames <- as.character(seq(nr))
      colnames(.Object) <- mynames
      rownames(.Object) <- mynames
    }
    if(nr %% 2 == 1) {
        warning("There must be an even number of elements\nAdding a ghost value")
        .Object@.Data <- rbind(.Object@.Data, rep(0, nc))
        nr <- nr+1
        .Object@.Data <- cbind(.Object@.Data, rep(0, nr))
        nc <- nc+1
        colnames(.Object)[nr] <- 'ghost'
        rownames(.Object)[nc] <- 'ghost'
    }
    # set the diagonal to zero
    diag(.Object@.Data) <- 0
    if(!isTRUE(all.equal(unname(.Object@.Data), unname(t(.Object@.Data))))) {
        stop("A distancematrix must be symmetric")
    }
    .Object
})
setGeneric("distancematrix", function(x, ...) standardGeneric("distancematrix"))
setMethod("distancematrix", "matrix", function(x, ...) {
    new('distancematrix', x, ...)
})
setMethod("distancematrix", "character", function(x, ...) {
    if(file.access(x) == -1) {
        stop("File not accessible")
    }
    if(length(grep("[.]csv$", x, ignore.case=TRUE)) != 1) {
        stop("Function expects a CSV file")
    }
    distancematrix(read.csv(x, ...))
})
setMethod("distancematrix", "data.frame", function(x, ...) {
    distancematrix(as.matrix(x), ...)
})
setMethod("distancematrix", "list", function(x, ...) {
    if("dist" %in% names(x)) {
        distancematrix(x$dist, ...)
    } else {
        stop("x should contain dist element")
    }
})
setMethod("[<-", "distancematrix", function(x, i, j, value) { stop("You may not re-assign elements of a distancematrix") })
setMethod("[[<-", "distancematrix", function(x, i, j, value) { stop("You may not re-assign elements of a distancematrix") })
couthcommander/nbpMatching documentation built on Aug. 19, 2023, 10:02 p.m.