R/form_models.R

Defines functions build_models build_study

Documented in build_models build_study

#' Formulates the experimental models
#'
#' \code{build_study} generates the full and null models for users unfamiliar
#' with building models in R. There are two types of experimental designs:
#' static and time-course. For more details, refer to the vignette.
#'
#' @param data \code{matrix}: gene expression data (rows are genes, columns are
#'   samples).
#' @param sampling \code{string}: type of study. Either "static" or 
#'   "timecourse". Default is "static".
#' @param grp \code{vector}: group assignement in the study (for K-class 
#'   studies). Optional.
#' @param tme \code{vector}: time variable in a time course study. Optional.
#' @param ind \code{factor}: individual factor for repeated observations of the
#'   same individuals. Optional.
#' @param bio.var \code{matrix}: biological variables. Optional.
#' @param basis.df \code{numeric}: degrees of freedom of the basis for time 
#'   course study. Default is 2.
#' @param basis.type \code{string}: either "ncs" (natural cubic spline) or "ps"
#'   (polynomial spline) basis for time course study. Default is "ncs".
#' @param adj.var \code{matrix}: adjustment variables. Optional.
#'
#' @return \code{\linkS4class{deSet}} object
#'
#' @examples
#' # create ExpressionSet object from kidney dataset
#' library(splines)
#' data(kidney)
#' age <- kidney$age
#' sex <- kidney$sex
#' kidexpr <- kidney$kidexpr
#'
#' # create deSet object from data
#' de_obj <- build_study(data = kidexpr, adj.var = sex, tme = age,
#' sampling = "timecourse", basis.df = 4)
#' @seealso \code{\linkS4class{deSet}}, \code{\link{build_models}}
#' @author John Storey, Andy Bass
#' @export
build_study = function(data, grp = NULL, adj.var = NULL, bio.var = NULL,
                     tme = NULL, ind = NULL,
                     sampling = c("static", "timecourse"), basis.df = 2,
                     basis.type = c("ncs", "poly")) {
  n <- ncol(data)
  m <- nrow(data)
  if (!is.matrix(data)) {
    stop("data must be a matrix")
  }
  if (!is.null(tme)) {
    if (is.matrix(tme) | is.vector(tme)) {
      tme <- data.frame(tme)
    } else {
      stop("tme must be a matrix")
    }
   # intercept <- !apply(tme, 2, var)
   # tme <- subset(tme, select=!intercept)
  }
  if (!is.null(adj.var)) {
    if (is.matrix(adj.var) | is.vector(adj.var) | is.factor(adj.var)) {
      adj.var <- data.frame(adj.var)
    } else {
      stop("adj.var must be a matrix")
    }
    #intercept <- !apply(adj.var, 2, var)
   # adj.var <- subset(adj.var, select=!intercept)
  }
  if (!is.null(bio.var)) {
#    sampling <- "notApplicable"
    if (is.matrix(bio.var)| is.vector(bio.var) | is.factor(bio.var)) {
      bio.var <- data.frame(bio.var)
    } else {
      stop("bio.var must be a matrix")
    }
    #intercept <- !apply(bio.var, 2, var)
   # bio.var <- subset(bio.var, select=!intercept)
    # Create models
    if (is.null(adj.var)) {
      pdat <- data.frame(bio.var)
      fmod <- paste("~", paste(names(pdat), collapse=" + "))
      nmod <- "~1"
    } else {
      pdat <- data.frame(adj.var, bio.var)
      fmod <- paste("~", paste(names(pdat), collapse=" + "))
      nmod <- paste("~", paste(names(adj.var), collapse=" + "))
    }
  } else {
    sampling <- match.arg(sampling, choices=c("static", "timecourse"))
    if (!is.null(grp)) {
      if (is.factor(grp)) {
        grp <- data.frame(grp = as.factor(grp))
      } else {
        stop("grp must be a factor")
      }
     # intercept <- !apply(grp, 2, var)
      #grp <- subset(grp, select=!intercept)
    } else {
      if(sampling == "static") {
        stop("grp variable cannot be missing for static sampling.")
      }
      grp <- data.frame(grp=rep(1,n))
    }
    g <- nrow(unique(grp))
    if (sampling == "static") {
      if (g==1) {
        stop("grp must have more than one unique value for static sampling.")
      }
      if (is.null(adj.var)) {
        pdat <- data.frame(grp)
        nmod <- "~1"
        fmod <- paste("~", paste(names(pdat), collapse=" + "))
      } else {
        pdat <- data.frame(adj.var, grp)
        fmod <- paste("~", paste(names(pdat), collapse=" + "))
        nmod <- paste("~", paste(names(adj.var), collapse=" + "))
      }
    }

    if (sampling == "timecourse") {
      basis.type <- match.arg(basis.type)
      varName <- colnames(data.frame(tme))
      if (length(varName) != 1) stop("Only one time variable is allowed. See ?deSet for information on how to create complicated models")
      if (basis.type == "ncs") {
        time.basis <- paste("ns(", varName,", df=", basis.df,", intercept=FALSE)", sep="")
      } else if (basis.type == "poly") {
        time.basis <- paste("bs(", varName,", df=", basis.df,", intercept=FALSE)", sep="")
      }
      if (g == 1) {
        # time course with no groups
        if (is.null(adj.var)) {
          pdat <- data.frame(tme)
          nmod <- "~1"
          fmod <- paste("~", time.basis)
        } else {
          pdat <- data.frame(adj.var, tme)
          fmod <- paste("~", paste(names(adj.var), collapse=" + "), "+", time.basis)
          nmod <- paste("~", paste(names(adj.var), collapse=" + "))
        }
      } else {
        if (is.null(adj.var)) {
          pdat <- data.frame(tme, grp)
        } else {
          pdat <- data.frame(tme, adj.var, grp)
        }
        # time course with groups
        nmod <- paste(paste("~", paste(names(pdat)[-1], collapse=" + ")), "+", time.basis)
        fmod <- paste(paste("~", paste(names(pdat)[-1], collapse=" + ")),"+",time.basis,"+", paste( "(", paste(names(pdat)[ncol(pdat)], collapse=" + ", sep=""), ")", ":", time.basis))  }
    }
  }
  rownames(pdat) <- colnames(data)
  exp_set <- ExpressionSet(as.matrix(data), AnnotatedDataFrame(pdat))
  edgeObj <- deSet(exp_set, full.model=as.formula(fmod),
                     null.model=as.formula(nmod), individual=ind)
  return(edgeObj)
}

#' Generate a deSet object with full and null models
#'
#' \code{build_models} creates a \code{\link{deSet}} object. The user inputs
#' the full and null models.
#'
#' @param data \code{matrix}: gene expression data.
#' @param cov \code{data.frame}: the covariates in the study.
#' @param full.model \code{formula}: the adjustment and the biological
#' variables of interest.
#' @param null.model \code{formula}: the adjustment variables.
#' @param ind \code{factor}: individuals sampled in the study. Default is
#' NULL. Optional.
#'
#' @return \code{\linkS4class{deSet}} object
#'
#' @examples
#' # create ExpressionSet object from kidney dataset
#' library(splines)
#' data(kidney)
#' age <- kidney$age
#' sex <- kidney$sex
#' kidexpr <- kidney$kidexpr
#' cov <- data.frame(sex = sex, age = age)
#'
#' # create models
#' null.model <- ~sex
#' full.model <- ~sex + ns(age, df=4)
#'
#' # create deSet object from data
#' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null.model,
#' full.model = full.model)
#' @seealso \code{\linkS4class{deSet}}, \code{\link{build_study}}
#' @author John Storey, Andy Bass
#' @export
build_models <- function(data, cov, full.model = NULL, null.model = NULL,
                      ind = NULL) {
  n <- ncol(data)
  m <- nrow(data)
  if (!is.matrix(data)) {
    stop("data must be a matrix")
  } else if (!is.data.frame(cov)) {
    stop("cov must be a data frame")
  } else if (is.null(full.model)) {
    stop("need an alternative model")
  }
  if (is.null(null.model)) {
    null.model <- ~1
  }
  if (!is(full.model, "formula") | !is(null.model, "formula")) {
    stop("alternative and null models must be formatted as a formula")
  }

  exp_set <- ExpressionSet(data, AnnotatedDataFrame(cov))
  edgeObj <- deSet(exp_set, full.model = full.model, null.model = null.model,
                   individual = ind)
  return(edgeObj)
}

Try the edge package in your browser

Any scripts or data that you put into this service are public.

edge documentation built on Nov. 8, 2020, 6:48 p.m.