R/pelora.R
In supclust: Supervised Clustering of Predictor Variables Such as Genes

Documented in coef.pelora fitted.pelora pelora plot.pelora predict.pelora print.pelora summary.pelora

## Pelora - a supervised algorithm for grouping predictor variables
pelora <- function(x, y, u = NULL, noc = 10, lambda = 1/32, flip = "pm",
                   standardize = TRUE, trace = 1)
  {
    ## Bedeutung der Input-Variablen
    ## -----------------------------
    ## x            Expressionmatrix im Format (n x p)
    ## y            Binärer Responsevektor, codiert mit 0 und 1
    ## u            Klinische Variablen im Format (n x m)
    ## noc          Anzahl Variablen, die ins Modell aufgenommen werden
    ## lambda       Reskalierter Penalty-Parameter, sollte in [0,1] sein
    ## flip         Sign-Flipping in der x-Matrix: ständig, einmalig, nicht
    ## standardize  Variablen-Standardisierung in der x-Matrix: ja oder nein?
    ## trace        Run-Control Output: 0 nichts, 1 moderat, 2 ausführlich


    ## Check input
    if(!is.matrix(x) || !is.numeric(x))
        stop("'x' must be a numeric matrix (e.g. gene expressions)")
    n  <- nrow(x)
    if(!is.numeric(y) || length(y) != n || any(y != 0 & y != 1))
        stop("'y' must be a numeric vector of length n = ", nrow(x),
             " with only 0/1 entries")
    yvals <- 0:1 # the y-values, aka "class labels"

    ## Sign-Flip
    if (flip=="pm")
      {
        x      <- cbind(x, -x)
        signs  <- c(rep(0,ncol(x)))
      }
    if (flip=="cor")
      {
        sgnChg <- sign.change(x,y)
        x      <- sgnChg$x
        signs  <- sgnChg$signs
      }
    if (flip=="none")
      {
        signs  <- NULL
      }

    ## Standardisierung der x-Variablen
    if (standardize)
      {
        stndz  <- standardize.genes(x)
        x      <- stndz$x
        ## means  <- stndz$means
        sdevs  <- stndz$sdevs
        if (any(sdevs==0))
          stop ("There are predictor variables with st. dev. = 0")
      }
    else
      {
        ## means <- NULL
        sdevs <- NULL
      }

    ## Setting up the clinical variables
    if(!is.null(u)) { ## 'have.u': with or without "clinical variables"?
        if(!is.matrix(u) || !is.numeric(u))
            stop("'u' must be a numeric matrix (e.g. clinical variables)")
        m <- ncol(u)
        E <- cbind(x, u)
    } else {
        m <- 0
        E <- x
    }

    ## Initialisierung
    g         <- ncol(E) # if flip %in% c("cor","none") = px + m
    p         <- noc+1
    px        <- ncol(x)
    X         <- matrix(0, n, p)
    X[,1]     <- 1
    P         <- matrix(0, p, p) ##= diag(apply(X,2,var)) * lambda*n
    theta0    <- log(mean(y)/(1-mean(y)))
    theta     <- c(theta0, rep(0, noc))
    prob      <- rep(1/(1 + exp(-theta0)), n)## == 1/(1 + exp(-X %*% theta))
    W         <- diag(prob*(1-prob))

    ## Earlier options that are now held fixed:
    penflag   <- 0 ## for the standard penalty, !=0 for the nonstandard penalty
    blockflag <- 1 ## for not allowing a gene to enter a cluster more than once
    critflag  <- 0 ## for the log-likelihood criterion, 1 for the L2 criterion
    valiflag  <- 1 ## for validation of genes via backdeletion, 0 to do without

    lSize     <- 2*g*p

    ## Aufruf der C-Funktion
    ## (worked faster with DUP=FALSE; TODO use .Call()!)
    res <- .C(R_clusterer,
              E = 	as.double(E),
              X = 	as.double(X),
              W = 	as.double(W),
              P = 	as.double(P),
              y = 	as.double(y),
              prob = 	as.double(prob),
              theta =   as.double(theta),
              lambda=   as.double(lambda*n),
              n = 	as.integer(n),
              g = 	as.integer(g),
              m = 	as.integer(m),
              p = 	as.integer(p),
              critflag = as.integer(critflag),
              penflag  = as.integer(penflag),
              blockflag= as.integer(blockflag),
              valiflag = as.integer(valiflag),
              traceflag= as.integer(trace),
              ## Output:
              genliste = 	integer(lSize),
              kriterium = 	double (lSize)
              )[c("genliste", "kriterium")]

    ## Auswertung, bilden der Liste mit Genen und Mittelwerten
    i         <- 1
    genliste  <- res$genliste
    kriterium <- res$kriterium
    genes     <- integer()
    cCrit     <- double()
    alle      <- list()
    crit      <- list()
    while (genliste[i] != 0) {

        if (genliste[i] == -1) {
            i <- i+1
            genes <- c(genes, genliste [i])
            cCrit <- c(cCrit, kriterium[i])
            i <- i+1
        }
        if (genliste[i] == -2) {
            kickout <- genliste[i+1]
            iKick <- which(genes == kickout)
            genes <- genes[-iKick]
            cCrit <- cCrit[-iKick]
            i <- i+2
        }
        if (genliste[i] == -4) { # cluster end
            alle    <- c(alle, list(genes))
            crit    <- c(crit, list(cCrit))
            genes <- integer()
            cCrit <- double()
            i <- i+1
        }
    }
    ## noclu <- length(alle)

    var.type <- factor(sapply(alle, function(j) any(j > px)),
                       levels = c(FALSE, TRUE),
                       labels = c("Cluster", "Clinical"))
    values <- sapply(alle, function(ids) rowMeans(E[, ids, drop=FALSE]))

    ## FIXME: The samp.names should be attached to 'values' / kept
    ## -----  where available;  values should get "predictor names" !!
    dnE <- dimnames(E)
    if(is.null(sNames <- dnE[[1]]))
        sNames <- names(y)
    if(is.null(sNames)) sNames <- as.character(1:n)

    ## Output
    out <- list(genes = alle, values = values, y = y, yvals = yvals,
                lambda = lambda, noc = noc, px = px, flip = flip,
                var.type = var.type, crit = crit, signs = signs,
                samp.names = sNames, gene.names = dnE[[2]], call = match.call())
    class(out) <- "pelora"
    out
  }


## Returns the fitted values
fitted.pelora <- function(object, ...)
  {
    ## Fitted values
    out <- object$values
    if (is.null(vNames <- colnames(out)))
      vNames      <- paste("Predictor", 1:object$noc)
    dimnames(out) <- list(object$samp.names, vNames)
    out
  }


## A short overview of what has been found by Pelora
print.pelora <- function(x, digits = getOption("digits"), details = FALSE, ...)
  {
    ## Preliminaries
    noc      <- x$noc
    fin.crit <- sapply(1:noc, function(i) {cr <- x$crit[[i]]; cr[length(cr)]})
    nGenes   <- unlist(lapply(x$genes,length))
    smDig    <- max(2, digits - 4)
    cCrit    <- format(round(fin.crit, smDig), nsmall = smDig, digits = digits)
    cG       <- format(nGenes)
    cI       <- format(1:noc)

    ## General overview
    cat("\nPelora called with lambda = ", x$lambda, ",", sep="")
    isClust <- x$var.type == "Cluster"
    if (allClust <- all(isClust)) ##  predictors are clusters only
      cat(" ", noc, " cluster", if(noc > 1)"s"," fitted\n\n", sep="")
    else ## predictors are both clusters *and* clinical variables
      cat("\n", (nC <- sum(isClust)), " cluster", if(nC>1)"s", " and ",
          noc - nC, " clinical variable", if ((noc-nC)>1)"s",
          " fitted\n\n", sep="")

    ## Information about each of the predictors
    for (i in 1:noc)
      {
	gic <- x$genes[[i]]
        hic <- x$genes[[i]]
        if(!is.null(x$signs) && any(x$signs == 0))
          hic[hic>(x$px/2)] <- hic[hic>(x$px/2)]-(x$px/2)
	if (isClust[i])
          {
	    ## the i-th predictor is a gene cluster
	    ng <- length(gic) # == nGenes[i]
	    if(allClust)
              cat("Cluster", cI[i], ": Contains ")
	    else
              cat("Predictor", cI[i], ": Cluster with ")
	    cat(cG[i], " gene", if(ng != 1)"s",
		", final criterion ", cCrit[i],"\n", sep="")

	    if(details)
              {
		## printing all the genes
		cj  <- format(1:ng)
		cgi <- format(hic)
		for (j in 1:ng)
                  {
                    cat("Entry", cj[j], ": Gene", cgi[j])
                    if (!is.null(x$signs))
                      {
                        if (x$signs[gic[j]] == 0)
                          {
                            cat(if (gic[j]>(x$px/2))
                                " (flipped)" else "          ")
                          }
                        else
                          {
                            cat(if (x$signs[gic[j]] == -1)
                                " (flipped)" else "          ")
                          }
                        if (!is.null(x$gene.names))
                          cat(" : Name", x$gene.names[gic[j]])
                        cat("\n")
                      }
                  }
              }
          }
	else
          { ## x$var.type[i] == "Clinical"
	    cat("Predictor ", cI[i], " : Clinical variable ",
                gic[1]-x$px,
		if (!is.null(x$gene.names))
		paste(" named '", x$gene.names[gic[1]], "'"),
		", final criterion ", cCrit[i], "\n", sep="")
          }
	if(details) cat("\n")
      } ## for(j )
    cat("\n")
    invisible(x)
  }

## Yields printed output about the clustering in more detail
summary.pelora <- function(object, digits = getOption("digits"), ...)
  {
    print(object, details = TRUE, digits = digits, ...)
  }


## Plots a 2-dimensional projection of the first 2 Pelora-clusters
plot.pelora <- function(x, main = "2-Dimensional Projection Pelora's output",
                        xlab = NULL, ylab = NULL, col = seq(x$yvals), ...)
  {
    if(x$noc <= 1)
      {
        if(is.null(xlab))
          xlab <- if (x$var.type[1] == "Cluster")
            "Mean Expression of Pelora's Cluster 1"
          else "Expression of a Clinical Variable"

        if(is.null(ylab))
          ylab <- "Class label"

        plot(x$values[,1], x$y, type="n", xlab=xlab, ylab=ylab, main=main, ...)
        text(x$values[,1], x$y, x$y, col = col[1 + x$y])
        return()
      }

    if(is.null(xlab))
    xlab <-
      if (x$var.type[1] == "Cluster")
        "Mean Expression of Pelora's Cluster 1"
      else "Expression of a Clinical Variable"

    if(is.null(ylab))
    ylab <-
        if (x$var.type[2] == "Cluster")
            "Mean Expression of Pelora's Cluster 2"
        else "Expression of a Clinical Variable"

    plot(x$values[,1], x$values[,2], type = "n",
         xlab = xlab, ylab = ylab, main = main, ...)
    text(x$values[,1], x$values[,2], x$y, col = col[1 + x$y])
    invisible()
}


## Returns the coefficients of the penalized logistic regression classifier
coef.pelora <- function(object, ...)
  {
    clusnames  <- character(object$noc)
    for (i in 1:object$noc) clusnames[i] <- paste("Predictor", i)
    out        <- ridge.coef(fitted(object), object$y, object$lambda)
    names(out) <- c("Intercept", clusnames)
    out
  }


## Predictions with Pelora's clusters
predict.pelora <- function(object, newdata = NULL, newclin = NULL,
                           type = c("fitted", "probs", "class"),
                           noc = object$noc, ...)
  {
    ## Checking the input
    type <- match.arg(type)
    stopifnot(length(noc) >= 1, length(object$noc) == 1, object$noc >= 1)
    if (max(noc) > object$noc)
      stop("You cannot predict with more predictors than you have fitted")


    if (is.null(newdata)) {
        ## Return fitted values, probabilities or class labels for the training data
        X   <- cbind(rep(1,length(object$y)),fitted(object))

        if (length(noc)==1) {
          if(noc==object$noc)
          {
            prvec           <- matrix(1/(1+exp(-c(X%*%coef(object)))), ncol = 1)
          } else { ## length(noc)==1 && noc < object$noc
            koeff <- ridge.coef(object$val[, 1:noc], object$y, object$lambda)
            prvec <- matrix(c((1/(1+exp(-(X[, 1:(noc+1)]%*%koeff))))), ncol = 1)
          }
            dimnames(prvec) <- list(object$samp.names, paste(noc, "Predictors"))
          ## return
          switch(type,
                 fitted = fitted(object),
                 probs  = prvec,
                 class  = (prvec > 0.5)*1)

        } else { ## (length(noc) > 1 && max(noc) <= object$noc)  {already checked}
            prmt <- NULL ## << FIXME
            for (i in 1:length(noc)) {
              koeff <- ridge.coef(object$val[,1:(noc[i])],object$y,object$lamb)
              ## TODO prmt[,i] <-
              prvec <- matrix(c(1/(1+exp(-(X[,1:(noc[i]+1)]%*%koeff)))), ncol=1,
                              dimnames = list(object$samp.n,paste(noc[i], "Predictors")))
              prmt  <- cbind(prmt, prvec)
            }
            ## return
            switch(type,
                   fitted = fitted(object)[, noc, drop = FALSE],
                   probs  = prmt,
                   class  = (prmt>0.5)*1)
        }
    } else {
        ## Returning fitted values, probabilities or class labels for 'newdata'

        ## Customizing newdata according to the choice of the flipping method
        if (object$flip=="pm") newdata <- cbind(newdata, -newdata)

        ## Check if new clinical variables are provided too
        if (is.null(newclin) && any(object$var.type=="Clinical"))
          stop("You also need to provide new clinical variables")

        ## Check the dimensions of the new data
        if (ncol(newdata)!=object$px)
          stop("The new data need to have the same number of ",
               "predictor variables as the training data")

        ## Flip the signs of the new data
        if (object$flip=="cor") newdata <- t(t(newdata)*object$signs)

        ## Standardization of the new data
        if (!is.null(object$means))
          {
            for (i in 1:(ncol(newdata)))
              {
                newdata[,i] <- (newdata[,i]-object$means[i])/object$sdevs[i]
              }
          }

        ## Merge expression data and clinical variables (if available)
        if (!is.null(newclin)) newdata <- cbind(newdata, newclin)

        ## Determine the fitted values
        Xt <- cbind(rep(1,nrow(newdata)))
        for (j in 1:object$noc) {
          Xt <- cbind(Xt, rowMeans(newdata[,object$genes[[j]], drop = FALSE]))
        }

        ## Naming the predictors
        sampnames <- 1:nrow(newdata)
        clusnames <- paste("Predictor", 1:object$noc)
        dimnames(Xt) <- list(sampnames, c("Intercept", clusnames))

        if (length(noc)==1) {
          eta <-
            if(noc == object$noc) {
              Xt %*% coef(object)
            } else { ## length(noc) == 1  &&  noc < object$noc
              koeff <- ridge.coef(object$val[,1:noc],object$y,object$lambda)
              Xt[,1:(noc+1)] %*% koeff
            }
          prvec <- matrix(1 /(1 + exp(- drop(eta))), ncol = 1,
                          dimnames = list(1:nrow(newdata), paste(noc, "Predictors")))
          switch(type,
                 fitted = Xt[,2:ncol(Xt)],
                 probs  = prvec,
                 class  = (prvec > 0.5)*1)
        }
        else { ##  (length(noc) > 1 && max(noc) <= object$noc)
          prmt <- NULL
          for (i in 1:length(noc)) {
            koeff <- ridge.coef(object$val[,1:(noc[i])],object$y,object$lamb)
            prvec <- matrix(1/ (1 + exp(-Xt[,1:(noc[i]+1)] %*% koeff)),
                            ncol = 1,
                            dimnames = list(1:nrow(newdata),paste(noc[i],"Predictors")))
            prmt  <- cbind(prmt, prvec)
          }
          switch(type,
                 fitted = Xt[,2:ncol(Xt)],
                 probs  = prmt,
                 class  = (prmt>0.5)*1)
        }
      }
  }
Any scripts or data that you put into this service are public.
supclust documentation built on Sept. 27, 2021, 5:11 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
supclust
Supervised Clustering of Predictor Variables Such as Genes

R/pelora.R
In supclust: Supervised Clustering of Predictor Variables Such as Genes

Defines functions predict.pelora coef.pelora plot.pelora summary.pelora print.pelora fitted.pelora pelora

Documented in coef.pelora fitted.pelora pelora plot.pelora predict.pelora print.pelora summary.pelora

Try the supclust package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

supclust Supervised Clustering of Predictor Variables Such as Genes

R/pelora.R In supclust: Supervised Clustering of Predictor Variables Such as Genes

Defines functions predict.pelora coef.pelora plot.pelora summary.pelora print.pelora fitted.pelora pelora

Documented in coef.pelora fitted.pelora pelora plot.pelora predict.pelora print.pelora summary.pelora

Try the supclust package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

supclust
Supervised Clustering of Predictor Variables Such as Genes

R/pelora.R
In supclust: Supervised Clustering of Predictor Variables Such as Genes