R/dummyVar.R

contr.ltfr <- function (n, contrasts = TRUE, sparse = FALSE) 
{
  if (is.numeric(n) && length(n) == 1L) {
    if (n > 1L) 
      levels <- as.character(seq_len(n))
    else stop("not enough degrees of freedom to define contrasts")
  }
  else {
    levels <- as.character(n)
    n <- length(n)
  }
  contr <- .RDiag(levels, sparse = sparse)
  if (contrasts) {
    if (n < 2L) stop(gettextf("contrasts not defined for %d degrees of freedom", n - 1L), domain = NA)
  }
  contr
}

contr.dummy <- function(n, ...)
  {
    if (is.numeric(n) && length(n) == 1L) {
      if (n > 1L) 
        levels <- as.character(seq_len(n))
      else stop("not enough degrees of freedom to define contrasts")
    }
    else {
      levels <- as.character(n)
      n <- length(n)
    }
    out <- diag(n)
    rownames(out) <- levels
    colnames(out) <- levels
    out
  }


"dummyVars" <-
  function(formula, ...){
    UseMethod("dummyVars")
  }
dummyVars.default <- function (formula, data, sep = ".", levelsOnly = FALSE, fullRank = FALSE, ...) 
{
  formula <- as.formula(formula)
  if(!is.data.frame(data)) data <- as.data.frame(data)

  vars <- all.vars(formula)
  if(any(vars == "."))
    {
      vars <- vars[vars != "."]
      vars <- unique(c(vars, colnames(data)))
    }
  isFac <- unlist(lapply(data[,vars,drop = FALSE], is.factor))
  if(sum(isFac) > 0)
    {
      facVars <- vars[isFac] 
      lvls <- lapply(data[,facVars,drop = FALSE], levels)
      if(levelsOnly)
      {
        tabs <- table(unlist(lvls))
        if(any(tabs > 1))
        {
          stop(paste("You requested `levelsOnly = TRUE` but",
                     "the following levels are not unique",
                     "across predictors:",
                     paste(names(tabs)[tabs > 1], collapse = ", ")))
        }
      }
    } else {
      facVars <- NULL
      lvls <- NULL
    }
  trms <- attr(model.frame(formula, data), "terms")
  out <- list(call = match.call(),
              form = formula,
              vars = vars,
              facVars = facVars,
              lvls = lvls,
              sep = sep,
              terms = trms,
              levelsOnly = levelsOnly,
              fullRank = fullRank)
  class(out) <- "dummyVars"
  out

}


print.dummyVars <- function(x, ...)
  {
    cat("Dummy Variable Object\n\n")
    cat("Formula: ")
    print(x$form)
    cat(length(x$vars),  " variables, ", length(x$facVars), " factors\n", sep = "")
    if(!is.null(x$sep) & !x$levelsOnly) cat("Variables and levels will be separated by '",
                                            x$sep, "'\n", sep = "")
    if(x$levelsOnly) cat("Factor variable names will be removed\n")
    if(x$fullRank) cat("A full rank encoding is used") else cat("A less than full rank encoding is used") 
    cat("\n")
    invisible(x)
  }


predict.dummyVars <- function(object, newdata, na.action = na.pass, ...)
  {
    if(is.null(newdata)) stop("newdata must be supplied")
    if(!is.data.frame(newdata)) newdata <- as.data.frame(newdata)
    if(!all(object$vars %in% names(newdata))) stop(
                                                   paste("Variable(s)",
                                                         paste("'", object$vars[object$vars %in% names(newdata)],
                                                               "'", sep = "",
                                                               collapse = ", "),
                                                         "are not in newdata"))
    Terms <- object$terms
    Terms <- delete.response(Terms)
    if(!object$fullRank)
    {
      oldContr <- options("contrasts")$contrasts
      newContr <- oldContr
      newContr["unordered"] <- "contr.ltfr"
      options(contrasts = newContr)
    }
    m <- model.frame(Terms, newdata, na.action = na.action, xlev = object$xlevels)

    x <- model.matrix(Terms, m)
    if(!object$fullRank) options(contrasts = oldContr)

    if(object$levelsOnly)
      {
        for(i in object$facVars)
          {
            colnames(x) <- gsub(paste("^", i, sep = ""), "", colnames(x))
            colnames(x) <- gsub(paste(":", i, sep = ""), ":", colnames(x), fixed = TRUE)
          }
      }
    if(!is.null(object$sep) & !object$levelsOnly)
      {
        for(i in object$facVars)
          {
            colnames(x) <- gsub(paste("^", i, sep = ""), paste(i, object$sep, sep = ""), colnames(x))
            colnames(x) <- gsub(paste(":", i, sep = ""), paste(":", i, object$sep, sep = ""), colnames(x), fixed = TRUE)
          }
      }  
    x[, colnames(x) != "(Intercept)", drop = FALSE]
  }

Try the caret package in your browser

Any scripts or data that you put into this service are public.

caret documentation built on May 2, 2019, 5:47 p.m.