R/step.plr.R
In stepPlr: L2 Penalized Logistic Regression with Stepwise Variable Selection

step.plr <- function(x, y, weights = rep(1, length(y)), fix.subset = NULL,
                     level = NULL, lambda = 1e-4, cp = 'bic', max.terms = 5,
                     type = c('both', 'forward', 'forward.stagewise'),
                     trace = FALSE)
  {
    this.call <- match.call()
    type <- match.arg(type)
    eps <- 1e-5
    nobs <- nrow(x)
    p <- ncol(x)
    if (is.null(dimnames(x)[[2]])) {
      dimnames(x)[[2]] <- xnames <- paste('x', seq(p), sep='')
    } else {
      xnames <- dimnames(x)[[2]]
    }
    if (cp == 'bic') {
      cp <- log(nobs)
    } else if (cp == 'aic') {
      cp <- 2
    }
    max.terms <- min(max.terms, 2^p - 1)
    nfix <- length(fix.subset)
    if (nfix > 0)
      names(fix.subset) <- xnames[fix.subset]
    if (max.terms <= nfix)
      stop('max.terms <= length(fix.subset): Increase max.terms\n')
    action <- action.name <- bestfit <- vector('list')
    dev <- df <- score <- rep(Inf, max.terms + 1)
    group <- vector('list', length = 2)
    ix <- NULL
    if (!is.numeric(x) || !is.null(level)) {
      imat <- get.imat(x, level)
      IX <- imat$imat
      grp <- imat$group
      level <- imat$level
    } else {
      IX <- x
      grp <- seq(p)
    }
    if (trace)
      cat('*** forward addition ***\n')
    ncolx <- 0
    term1 <- seq(max(grp))
    m <- 0
    offset.subset <- offset.coefficients <- NULL
    bestfit[[m + 1]] <- fit <- plr(rep(1,nobs), y, weights, offset.subset,
                                   offset.coefficients, lambda, cp)
    dev[m + 1] <- fit$deviance
    df[m + 1] <- fit$df
    score[m + 1] <- fit$score
    while (m < max.terms) {
      m <- m + 1
      add1 <- FALSE
      if (m > 1 && type == 'forward.stagewise') {
        offset.subset <- seq(ncol(ix))
        offset.coefficients <- bestfit[[m]]$coefficients[-1]
      }
      if (length(term1) > 0) {
        for (term in term1) {
          ix0 <- cbind(ix, IX[ , grp == term, drop = FALSE])
          fit <- plr(ix0, y, weights, offset.subset,
                     offset.coefficients, lambda, cp)
          if (term %in% fix.subset || fit$score < score[m + 1]) {
            bestfit[[m + 1]] <- fit
            ix1 <- ix0
            dev[m + 1] <- fit$dev
            df[m + 1] <- fit$df
            score[m + 1] <- fit$score
            group[[2]] <- c(group[[1]], rep(m, ncol(ix1) - ncolx))
            action[[m]] <- -term
            action.name[[m]] <- xnames[term]
            add1 <- TRUE
            addterm1 <- term
          }
          if (term %in% fix.subset)
            break
          if (m > 1) {
            for (i in 1:(m - 1)) {
              if (!term.match(xnames[term], action.name[[i]],
                              action.name, m - 1)) {
                ix0 <- cbind(ix, cross.imat(IX[ , grp == term, drop = FALSE],
                                            ix[ , group[[1]] == i,
                                               drop = FALSE]))
                fit <- plr(ix0, y, weights, offset.subset,
                           offset.coefficients, lambda, cp)
                if (fit$score < score[m + 1]) {
                  bestfit[[m + 1]] <- fit
                  ix1 <- ix0
                  dev[m + 1] <- fit$dev
                  df[m + 1] <- fit$df
                  score[m + 1] <- fit$score
                  group[[2]] <- c(group[[1]], rep(m, ncol(ix1) - ncolx))
                  action[[m]] <- c(-term, i)
                  action.name[[m]] <- c(xnames[term], action.name[[i]])
                  add1 <- FALSE
                }
              }
            }
          }
        }
      }
      if (m > (nfix + 2)) {
        for (i in (nfix + 1):(m - 2)) {
          for (ii in (i + 1):(m - 1)) {
            if (length(action.name[[i]]) == 1 ||
                length(action.name[[ii]]) == 1) {
              if (!term.match(action.name[[i]], action.name[[ii]],
                              action.name, m - 1)) {
                ix0 <- cross.imat(ix[ , group[[1]] == i, drop = FALSE],
                                  ix[ , group[[1]] == ii, drop = FALSE])
                ix0 <- cbind(ix, ix0)
                fit <- plr(ix0, y, weights, offset.subset,
                           offset.coefficients, lambda, cp)
                if (fit$score < score[m + 1]) {
                  bestfit[[m + 1]] <- fit
                  ix1 <- ix0
                  dev[m + 1] <- fit$dev
                  df[m + 1] <- fit$df
                  score[m + 1] <- fit$score
                  group[[2]] <- c(group[[1]], rep(m, ncol(ix1) - ncolx))
                  action[[m]] <- c(i, ii)
                  action.name[[m]] <- c(action.name[[i]], action.name[[ii]])
                  add1 <- FALSE
                }
              }
            }
          }
        }
      }
      if (add1)
        term1 <- term1[term1 != addterm1]
      if (trace) {
        for (i in 1:length(action.name[[m]])) {
          cat(action.name[[m]][i], ' ')
        }
        cat('added\t', 'df=', df[m + 1], '\tscore=', score[m + 1], '\n')
      }
      ix <- ix1
      ncolx <- length(group[[2]])
      group[[1]] <- group[[2]]
    }
    if (type != 'both') {
      if (nfix > 0) {
        min.score <- min(score[-seq(nfix)])
        M <- min(which(score[-seq(nfix)] <= min.score + eps))
        M <- M + nfix
      } else {
        min.score <- min(score)
        M <- min(which(score <= min.score + eps))
      }
      if (M == 1) {
        action <- action.name <- group <- NULL
      } else {
        action <- action[1:(M - 1)]
        action.name <- action.name[1:(M - 1)]
        group <- group[[1]]
        group <- group[group < M]
      }
      object <- list(fit = bestfit[[M]], action = action,
                     action.name = action.name, deviance = dev[1:M],
                     df = nobs - df[1:M], score = score[1:M], group = group,
                     y = y, weights = weights, fix.subset = fix.subset,
                     level = level, lambda = lambda, cp = cp, type = type,
                     xnames = xnames, call = this.call)
      class(object) <- 'stepplr'
      return(object)
    }
    action.f <- action
    action.name.f <- action.name
    action.b <- rep(0, m)
    dev <- c(dev[1], rep(0, m - 1), dev[m + 1])
    df <- c(df[1], rep(0, m - 1), df[m + 1])
    score <- c(score[1], rep(Inf, m - 1), score[m + 1])
    back.i <- seq(m)
    if (trace)
      cat('\n*** backward deletion ***\n')
    while (m > 1) {
      m0 <- ifelse(m > nfix, nfix + 1, 1)
      for (i in m0:m) {
        delete <- TRUE
        for (ii in m0:m) {
          if (i!=ii &&
              all(action.name.f[[back.i[i]]] %in% action.name.f[[back.i[ii]]]))
            delete <- FALSE
        }
        if (delete) {
          ix0 <- ix[ , group[[1]] != back.i[i]]
          fit <- plr(ix0, y, weights, NULL, NULL, lambda, cp)
          if (fit$score < score[m]) {
            bestfit[[m]] <- fit
            ix1 <- ix0
            dev[m] <- fit$dev
            df[m] <- fit$df
            score[m] <- fit$score
            group[[2]] <- group[[1]][group[[1]] != back.i[i]]
            action.b[m] <- delete.i <- back.i[i]
          }
        }
      }
      if (trace) {
        for (i in 1:length(action.name.f[[delete.i]])) {
          cat(action.name.f[[delete.i]][i], ' ')
        }
        cat('deleted\t','df=', df[m], '\tscore=', score[m], '\n')
      }
      ix <- ix1
      group[[1]] <- group[[2]]
      back.i <- back.i[back.i != delete.i]
      m <- m - 1
    }
    action.b[1] <- back.i
    if (trace)
      cat(action.name.f[[back.i]], ' deleted\t', 'df=',df[1],
          '\tscore=', score[1], '\n')
    if (nfix > 0) {
      min.score <- min(score[-seq(nfix)])
      M <- min(which(score[-seq(nfix)] <= min.score + eps))
      M <- M + nfix
    } else {
      min.score <- min(score)
      M <- min(which(score <= min.score + eps))
    }
    if (M == 1) {
      action <- action.name <- group <- NULL
    } else {
      action <- action.name <- vector('list', length = M - 1)
      group <- NULL
      nlevel <- rep(0, p)
      for (i in 1:p) nlevel[i] <- max(1, length(level[[i]]))
      for (i in 1:(M - 1)) {
        act0 <- action.f[[action.b[i]]]
        act1 <- NULL
        s <- 0
        for (a in act0) {
          if (a < 0) {
            act1 <- c(act1, a)
            s <- s + nlevel[-a]
          }
          else {
            ii <- which(action.b == a)
            act1 <- c(act1, ii)
            if (s == 0) {
              s <- sum(group == ii)
            } else {
              s <- s*sum(group == ii)
            }
          }
        }
        action[[i]] <- act1
        action.name[[i]] <- action.name.f[[action.b[i]]]
        group <- c(group, rep(i, s))
      }
    }
    object <- list(fit = bestfit[[M]], action = action,
                   action.name = action.name, deviance = dev[1:M],
                   df = nobs - df[1:M], score = score[1:M], group = group,
                   y = y, weights = weights, fix.subset = fix.subset,
                   level = level, lambda = lambda, cp = cp, type = type,
                   xnames = xnames, call = this.call)
    class(object) <- 'stepplr'
    return(object)
  }

predict.stepplr <- function(object, x = NULL, newx = NULL,
                            type = c('link', 'response', 'class'), ...)
  {
    type <- match.arg(type)
    if (is.null(newx)) {
      pred <- predict(object$fit, type=type, ...)
    } else {
      if (object$type == 'both') {
        if (is.null(x) || nrow(x) != length(object$y))
          stop('x in the training data must be provided')
        if (is.vector(newx))
          newx <- matrix(newx)
        if (is.null(object$action)) {
          ix.tr <- 1
          ix.new <- matrix(1, nrow(newx), 1)
        } else {
          ix.tr <- imat(object, x)
          ix.new <- imat(object, newx)
        }
        fit <- plr(ix.tr, object$y, object$weights,
                   lambda = object$lambda, cp = object$cp)
        pred <- predict(fit, ix.new, type, ...)
      } else {
        ix.new <- imat(object, newx)
        pred <- predict(object$fit, ix.new, type, ...)
      }
    }
    return(pred)
  }

cv.step.plr <- function(x, y, weights = rep(1, length(y)),
                        nfold = 5, folds = NULL, lambda = c(1e-4, 1e-2, 1),
                        cp = c('aic', 'bic'), cv.type=c('deviance', 'class'),
                        trace = TRUE, ...)
  {
    cv.type <- match.arg(cv.type)
    n <- length(y)
    m1 <- length(lambda)
    m2 <- length(cp)
    devresids <- binomial()$dev.resids
    if (is.null(folds)) {
      folds <- split(sample(seq(n)), rep(1:nfold, length = n))
    } else if (length(folds) != nfold) {
      stop('The number of folds must match nfold')
    }
    errorarr <- array(0, dim = c(m1, m2, nfold))
    for (i in 1:nfold) {
      omit <- folds[[i]]
      for (i1 in 1:m1) {
        for (i2 in 1:m2) {
          fit <- step.plr(x[-omit, ], y[-omit], weights[-omit],
                          lambda=lambda[i1], cp=cp[i2], trace=TRUE, ...)
          if (cv.type == 'deviance') {
            mu <- predict(fit, x[-omit, ], x[omit, ], 'response')
            pred <- devresids(y[omit], mu, weights[omit])
            errorarr[i1, i2, i] <- sum(pred)
          } else {
            pred <- predict(fit, x[-omit, ], x[omit, ], 'class')
            errorarr[i1, i2, i] <- mean(pred != y[omit])
          }
        }
      }
      if (trace)
        cat('CV fold', i, '\n')
    }
    error <- apply(errorarr, c(1,2), mean)
    se.error <- sqrt(apply(errorarr, c(1, 2), var) / nfold)
    dimnames(error) <- dimnames(se.error) <- list(lambda, cp)
    return(list(error = error, se.error = se.error,
                lambda = lambda, cp = cp, folds = folds))
  }

anova.stepplr <- function(object, ...)
  {
    cat('\nAnalysis of Deviance Table\n\n')
    if (object$type == 'both') {
      cat('Type: Forward stepwise selection\n\n')
    } else {
      cat('Type: Forward selection\n\n')
    }
    action <- object$action.name
    m <- length(action)
    if (m > 0) {
      for (i in 1:m) {
        k <- length(action[[i]])
        if (k > 1) {
          a <- action[[i]][1]
          for (j in 2:k) {
            a <- paste(a, ':', action[[i]][j], sep = '')
          }
          action[[i]] <- a
        }
      }
      df <- round(object$df, digits=2)
      dev <- round(object$dev, digits=2)
      ddf <- c(NA, df[1:m] - df[2:(m+1)])
      ddev <- c(NA, dev[1:m] - dev[2:(m+1)])
      junk <- cbind(ddf, ddev, df, dev)
    } else {
      junk <- cbind(NA, NA, object$df, object$dev)
    }
    dimnames(junk) <- list(c('Intercept', action),
                           c('Df', 'Deviance', 'Resid.Df', 'Resid.Dev'))
    print(junk)
    return(invisible(junk))
  }

summary.stepplr <- function(object, ...)
  {
    return(invisible(summary(object$fit)))
  }

print.stepplr <- function(x, ...)
  {
    if (x$type == 'both') {
      cat('\nType: Forward stepwise selection\n\n')
    } else {
      cat('\nType: Forward selection\n\n')
    }
    action <- x$action.name
    m <- length(action)
    if (m > 0) {
      for (i in 1:m) {
        k <- length(action[[i]])
        if (k > 1) {
          a <- action[[i]][1]
          for (j in 2:k) {
            a <- paste(a,':', action[[i]][j], sep = '')
          }
          action[[i]] <- a
        }
      }
      cat('Variables with nonzero coefficients:\n')
      for (i in 1:m) {
        cat(action[[i]],'\n')
      }
    } else {
      cat('Null model selected\n')
    }
    fit <- x$fit
    cat('\n    Null deviance:', round(fit$null.deviance, digits = 2),
        'on', fit$nobs - 1, 'degrees of freedom\n')
    cat('Residual deviance:', round(fit$deviance, digits = 2),
        'on', round(fit$nobs - fit$df, digits = 2), 'degrees of freedom\n')
    cat('            Score: deviance +', round(fit$cp, digits = 1),
        '* df =', round(fit$score, digits = 2), '\n')
  }
Any scripts or data that you put into this service are public.
stepPlr documentation built on May 2, 2019, 3:08 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
stepPlr
L2 Penalized Logistic Regression with Stepwise Variable Selection

R/step.plr.R
In stepPlr: L2 Penalized Logistic Regression with Stepwise Variable Selection

Try the stepPlr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

stepPlr L2 Penalized Logistic Regression with Stepwise Variable Selection

R/step.plr.R In stepPlr: L2 Penalized Logistic Regression with Stepwise Variable Selection

Try the stepPlr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

stepPlr
L2 Penalized Logistic Regression with Stepwise Variable Selection

R/step.plr.R
In stepPlr: L2 Penalized Logistic Regression with Stepwise Variable Selection