R/library.R

Defines functions md.censor md.exp md.eval md.data md.death md.constant md.sample md.mvnorm md.norm md.binom md.uniform md.sex md.simparams md.simulate

Documented in md.binom md.censor md.constant md.data md.death md.eval md.exp md.mvnorm md.norm md.sample md.sex md.simparams md.simulate md.uniform

#' md.simdataobject is an abstract class that is inherited by the 
#' md.simdata covariate generating classes such as md.binom, md.norm etc.
#' 
#' @keywords internal 
#' 
#' @slot name name of the covariate
#' @slot center expected mean of the covariate used to center values in md.exp
#' @slot virtual specifies whether the covariate will be included in the final dataset generated by md.simulate
#' @name md.simdataobject-class
#' @rdname md.simdataobject
#' @exportClass md.simdataobject
setClass("md.simdataobject", representation(
    name = 'character',
    center = 'numeric',
    virtual = 'logical'
))
#' Constructor of the md.simdataobject
#' 
#' @keywords internal 
#' 
#' @name md.simdataobject-init
#' @rdname md.simdataobject
#' @usage aaa
setMethod("initialize", "md.simdataobject", function(.Object, x){ 
  .Object@name = x
  .Object@center = Inf
  .Object@virtual = FALSE
  .Object
})
setGeneric("md.generate", def=function(.Object, .data, .param){standardGeneric("md.generate")})
setClass("md.binom", representation(
  prob = "numeric"
), contains = "md.simdataobject")
setMethod("initialize", "md.binom", function(.Object, x, y){ 
  .Object = callNextMethod(.Object, x)
  .Object@prob = y
  .Object@center = y
  .Object
})
setMethod("md.generate", "md.binom", function(.Object, .data, .param){ 
  data = data.frame(rbinom(dim(.data)[1], 1, .Object@center))
  names(data) = c(.Object@name)
  data
})
setClass("md.sex", representation(
  maleperc = "numeric",
  asfactor = "logical"
), contains = "md.simdataobject")
setMethod("initialize", "md.sex", function(.Object, x, y, z){ 
  .Object = callNextMethod(.Object, x)
  .Object@maleperc = y
  .Object@asfactor = z
  .Object@center = 2 - .Object@maleperc
  .Object
})
setMethod("md.generate", "md.sex", function(.Object, .data, .param){ 
  values = 1 + rbinom(dim(.data)[1], 1, 1-.Object@maleperc)
  if (.Object@asfactor)
  {
    values = factor(values)
    levels(values) = c("male", 'female')
  }
  data = data.frame(values)
  names(data) = c(.Object@name)
  data
})
setClass("md.sample", representation(
  values = "numeric",
  dates = "Date",
  weights = "numeric"
), contains = "md.simdataobject")
setMethod("initialize", "md.sample", function(.Object, x, y, z){ 
  .Object = callNextMethod(.Object, x)
  
  .Object@weights = rep(1, length(y))
  if (!is.null(z))
    .Object@weights = z

  if (is(y, "Date"))
  {
    .Object@dates = y
    .Object@values = Inf
    .Object@center = sum(.Object@weights * as.numeric(.Object@dates)/sum(.Object@weights))
  }
  else
  {
    .Object@values = y
    .Object@center = sum(.Object@weights * .Object@values)/sum(.Object@weights)
  }
  
  .Object
})
setMethod("md.generate", "md.sample", function(.Object, .data, .param){ 
  if (.Object@values[1] != Inf)
    data = data.frame(sample(.Object@values, dim(.data)[1], replace=TRUE, .Object@weights))
  else
    data = data.frame(sample(.Object@dates, dim(.data)[1], replace=TRUE, .Object@weights))
  names(data) = c(.Object@name)
  data
})
setClass("md.uniform", representation(
  min = "numeric",
  max = "numeric",
  minDate = "Date",
  maxDate = "Date"
), contains = "md.simdataobject")
setMethod("initialize", "md.uniform", function(.Object, x, y, z){ 
  .Object = callNextMethod(.Object, x)
  
  if (is(y, "Date"))
  {
    .Object@min = Inf
    .Object@max = Inf
    .Object@minDate = y
    if (is(z, "numeric"))
      .Object@maxDate = y + z
    else
      .Object@maxDate = z
    .Object@center = as.numeric(.Object@minDate)+as.numeric(.Object@maxDate)/2
  }
  else
  {
    .Object@min = y
    .Object@max = z
    .Object@center = (.Object@min + .Object@max) / 2
  }
  
  .Object
})
setMethod("md.generate", "md.uniform", function(.Object, .data, .param){ 
  if (.Object@min == Inf)
    data = data.frame(as.Date(runif(dim(.data)[1], .Object@minDate, .Object@maxDate), origin="1970-01-01"))
  else
    data = data.frame(runif(dim(.data)[1], min=.Object@min, max=.Object@max))
  
  names(data) = c(.Object@name)
  data
})
setClass("md.norm", representation(
  sd = "numeric"
), contains = "md.simdataobject")
setMethod("initialize", "md.norm", function(.Object, x, y, z){ 
  .Object = callNextMethod(.Object, x)
  .Object@center = y
  .Object@sd = z
  .Object
})
setMethod("md.generate", "md.norm", function(.Object, .data, .param){ 
  data = data.frame(rnorm(dim(.data)[1], mean=.Object@center, sd=.Object@sd))
  names(data) = c(.Object@name)
  data
})
setClass("md.mvnorm", representation(
  names = "character",
  means = "numeric",
  cov = "matrix"
), contains = "md.simdataobject")
setMethod("initialize", "md.mvnorm", function(.Object, x, y, z){ 
  .Object = callNextMethod(.Object, "")
  .Object@names = x
  .Object@means = y
  .Object@cov = z
  .Object
})
setMethod("md.generate", "md.mvnorm", function(.Object, .data, .param){ 
  data = data.frame(mvrnorm(dim(.data)[1], .Object@means, .Object@cov))
  names(data) = .Object@names
  data
})
setClass("md.eval", representation(
   formula = "character"
), contains = "md.simdataobject")
setMethod("initialize", "md.eval", function(.Object, x, y, z, q){ 
  .Object = callNextMethod(.Object, x)
  .Object@formula = y
  .Object@center = z
  .Object@virtual = q
  .Object
})
setMethod("md.generate", "md.eval", function(.Object, .data, .param){ 
  expr = parse(text=.Object@formula)
  data = data.frame(eval(expr, .data))
  names(data) = c(.Object@name)
  data
})
setClass("md.data", representation(
  data = "data.frame",
  randomsample = "logical"
), contains = "md.simdataobject")
setMethod("initialize", "md.data", function(.Object, x, y){ 
  .Object = callNextMethod(.Object, "")
  .Object@data = x
  .Object@randomsample = y
  .Object
})
setMethod("md.generate", "md.data", function(.Object, .data, .param){ 
  data = .Object@data
  if (.Object@randomsample)
  {
    indexes = sample(1:dim(.Object@data)[1], dim(.data)[1], replace=TRUE)
  }
  else
  {    
    if (dim(.data)[1] == dim(.Object@data)[1])
      return (data)
    indexes = ((0:(dim(.data)[1]-1)) %% dim(.Object@data)[1]) + 1
  }
  data = data[indexes,]
  rownames(data) = 1:dim(data)[1]
  data
})
setOldClass("ratetable")
setClass("md.death", representation(
  poptable = "ratetable",
  sexcolname = "character",
  birthcolname = "character",
  startcolname = "character"
), contains = "md.simdataobject")
setMethod("initialize", "md.death", function(.Object, x, y, z, q, w){ 
  .Object = callNextMethod(.Object, x)
  .Object@poptable = y
  .Object@sexcolname = z
  .Object@birthcolname = q
  .Object@startcolname = w
  .Object
})
setMethod("md.generate", "md.death", function(.Object, .data, .param){ 
  data = runif(dim(.data)[1])
  year = as.numeric(.data[,which(colnames(.data)== .Object@birthcolname)] - as.Date("0-1-1"))/365.2425
  #year = as.numeric(format(.data[,which(colnames(.data)== .Object@birthcolname)], "%Y"))
  age = .data[,which(colnames(.data)== .Object@startcolname)] - .data[,which(colnames(.data)== .Object@birthcolname)]
  sex = .data[,which(colnames(.data)== .Object@sexcolname)]
  
  md.init(.Object@poptable)
  for (i in 1:length(data))
    data[i] = md.survtime(year[i], age[i], data[i], sex[i])
  
  data = .data[,which(colnames(.data)== .Object@startcolname)] + data
  data = data.frame(data)
  names(data) = c(.Object@name)
  data
})
setClass("md.exp", representation(
  startcolname = "character",
  startdate = "Date",
  columns = "character",
  betas = "numeric",
  lambda = "numeric"
), contains = "md.simdataobject")
setMethod("initialize", "md.exp", function(.Object, x, y, z, q, w){ 
  .Object = callNextMethod(.Object, x)
  if (is(y, "Date"))
    .Object@startdate = y
  else
    .Object@startcolname = y
  .Object@columns = z
  .Object@betas = q
  .Object@lambda = w
  .Object
})
setMethod("md.generate", "md.exp", function(.Object, .data, .param){ 
  lambda = array(dim = dim(.data)[1], .Object@lambda)
  beta = array(dim = dim(.data)[1], 0)

  for (i in 1:length(.Object@columns))
  {
    col = .data[,which(colnames(.data)== .Object@columns[i])]
    center = Inf
    
    for (j in 1:length(.param@params))
    {
      if (.param@params[[j]]@name == .Object@columns[i])
        center = .param@params[[j]]@center
      if (class(.param@params[[j]])[1] == "md.mvnorm")
        for (k in 1:length(.param@params[[j]]@names))
          if (.param@params[[j]]@names[k] == .Object@columns[i])
            center = .param@params[[j]]@means[k]
    }
    if(is.infinite(center))
      center = mean(col)
    
    beta = beta + (col - center) * .Object@betas[i]
  }
  lambda =  exp(beta) * lambda
  
  if (is(.Object@startdate, "Date"))
    startcol = data.frame(.Object@startdate + array(dim= dim(.data)[1], 0))
  else
    startcol = .data[,which(colnames(.data)== .Object@startcolname)]
  
  data = data.frame(startcol + rexp(length(lambda), lambda))
                      #+ rlnorm(length(lambda), 50*lambda*365, 5)) centering?

  names(data) = c(.Object@name)
  data
})
setClass("md.simdata", representation(
  params = 'list'
))
setMethod("initialize", "md.simdata", function(.Object){ 
  .Object
})
#' operator
#' 
#' @keywords internal 
#' 
#' @usage x
setMethod("+", c("md.simdata", "md.simdataobject"), function(e1, e2) {
  e1@params = c(e1@params, e2)
  e1
})






















#' md.simulate
#' 
#' Creates a simulated dataset using the provided simulation parameters.
#' 
#' @param sim a \code{\link{md.simparams}} object containg simulation parameters
#' @param N number of observations
#' @examples
#' 
#' \dontrun{
#' library(missDeaths)
#' ratetable = survexp.us
#' 
#' sim = md.simparams() +
#'           md.sex("sex", 1) + 
#'           md.uniform("Z1") +
#'           md.mvnorm(c("Z2", "Z3"), c(100, 0), matrix(c(225, 3, 2, 1), 2, 2)) +
#'           md.sample("Z4", c(1, 2, 3, 4), c(0.25, 0.25, 0.25, 0.25)) +
#'           md.uniform("birth", as.Date("1930-1-1"), as.Date("1970-1-1")) +
#'           md.constant("start", as.Date("1990-1-1")) +
#'           md.death("death", ratetable, "sex", "birth", "start") +
#'           md.eval("age", "as.numeric(start - birth)/365.2425", 80, FALSE) + 
#'           md.exp("event", "start", c("age", "sex", "Z1", "Z2"), 
#'              c(0.1, 2, 1, 0.01), 0.0001)
#'           
#' data = md.simulate(sim, 1000)
#' }
#' @export md.simulate
md.simulate = function(sim, N){
  if (class(sim)[1] == "md.simdata")
  {
    data = data.frame(1:N)
    names(data) = c("row")
    for (i in 1:length(sim@params))
      data = cbind(data, md.generate(sim@params[[i]], data, sim))
    
    data = within(data, rm("row"))
    for (i in 1:length(sim@params))
      if (sim@params[[i]]@virtual == TRUE)
        data = data[, ! names(data) %in% sim@params[[i]]@name, drop = TRUE]
    
    data
  }
}


#' md.simparams
#' 
#' Constructs an md.simparams object that holds the parameters required to generate the simulated data set. 
#' The parameters specifying covariates and event time variables are appended to the md.simparams by 
#' adding the appropriate function call
#' 
#' @seealso \code{\link{md.constant}}, \code{\link{md.uniform}}, \code{\link{md.binom}}, \code{\link{md.norm}}, \code{\link{md.mvnorm}}, \code{\link{md.sex}}, \code{\link{md.exp}}, \code{\link{md.death}}
#' @examples
#' 
#' \dontrun{
#' library(missDeaths)
#' 
#' sim = md.simparams() +
#'    md.sex("sex", 0.5) + 
#'    md.uniform("birth", as.Date("1930-1-1"), as.Date("1970-1-1")) +
#'    md.uniform("start", as.Date("2005-1-1"), as.Date("2010-1-1")) +
#'    md.death("death", survexp.us, "sex", "birth", "start") 
#' }
#' 
#' @export md.simparams
md.simparams = function(){ new("md.simdata")}


#' md.sex
#' 
#' Creates information of a sex covariate that is a special case of Bernoulli covariate
#' specifying 1 for male and 2 for female sex. 
#' This function call must be added to the \code{\link{md.simparams}} object.
#' 
#' @param name name of the covariate
#' @param maleperc percentage of males (value = 1) versus females (value = 2)
#' @param asfactor specifies whether the resulting sex covariate is factorized using as.factor or not
#' @usage md.sex(name, maleperc = 0.5, asfactor = FALSE)
#' @examples
#' 
#' \dontrun{
#' library(missDeaths)
#' 
#' sim = md.simparams() +
#'    md.sex("sex", 0.5)
#' }
#' 
#' @export md.sex
md.sex = function(name, maleperc = 0.5, asfactor = FALSE) { new("md.sex", name, maleperc, asfactor) }


#' md.uniform
#' 
#' Creates information of a uniformly distributed numeric or date covariate with the specified lower and upper limits.
#' This function call must be added to the \code{\link{md.simparams}} object.
#' 
#' @param name name of the covariate
#' @param min,max lower and upper limits of the distribution. Must be finite (either numeric or date)
#' @usage md.uniform(name, min = 0, max = 1)
#' @examples
#' 
#' \dontrun{
#' library(missDeaths)
#' 
#' sim = md.simparams() +
#'    md.uniform("X", 0, 1)
#' }
#' 
#' @export md.uniform
md.uniform = function(name, min = 0, max = 1) { new("md.uniform", name, min, max) }


#' md.binom
#' 
#' Creates information of a Bernoulli distributed covariate with the specified probability.
#' This function call must be added to the \code{\link{md.simparams}} object.
#' 
#' @param name name of the covariate
#' @param prob probability of success (having a value of 1)
#' @usage md.binom(name, prob = 0.5)
#' @examples
#' 
#' \dontrun{
#' library(missDeaths)
#' 
#' sim = md.simparams() +
#'    md.binom("X", 0.25)
#' }
#' 
#' @export md.binom
md.binom = function(name, prob = 0.5) { new("md.binom", name, prob) }


#' md.norm
#' 
#' Creates information of a normally distributed numeric covariate with the specified mean and standard deviation.
#' This function call must be added to the \code{\link{md.simparams}} object.
#' 
#' @param name name of the covariate
#' @param mean,sd mean and standard deviation
#' @usage md.norm(name, mean = 0, sd = 1)
#' @examples
#' 
#' \dontrun{
#' library(missDeaths)
#' 
#' sim = md.simparams() +
#'    md.norm("X", 0, 1) 
#' }
#' 
#' @export md.norm
md.norm = function(name, mean = 0, sd = 1) { new("md.norm", name, mean, sd) }


#' md.mvnorm
#' 
#' Creates information of a vector of multi-normal covariates with the specified array of means and covariance matrix.
#' This function call must be added to the \code{\link{md.simparams}} object.
#' 
#' @param names vector of covariate names 
#' @param means vector of means, default is \code{rep(0, length(names))}
#' @param cov covariance matrix, default is \code{diag(ncol(names))}
#' @usage md.mvnorm(names, means = rep(0, length(names)), cov = diag(ncol(names)))
#' @examples
#' 
#' \dontrun{
#' library(missDeaths)
#' 
#' sim = md.simparams() +
#'    md.mvnorm(c("X1", "X2"), c(100, 0), matrix(c(225, 3, 2, 1), 2, 2))
#' }
#' @export md.mvnorm
md.mvnorm = function(names, means = rep(0, length(names)), cov = diag(ncol(names))) { new("md.mvnorm", names, means, cov) }


#' md.sample
#' 
#' Creates information of a covariate that represents a random sample (with replacement) of the provided values.
#' This function call must be added to the \code{\link{md.simparams}} object.
#' 
#' @param name name of the covariate
#' @param array vector of elements from which to choose from
#' @param weights vector of probability weights for obtaining the elements of the vector being sampled or NULL if all values have equal probabilities
#' @usage md.sample(name, array, weights=NULL)
#' @examples
#' 
#' \dontrun{
#' library(missDeaths)
#' 
#' sim = md.simparams() +
#'    md.sample("X", c(0, 1, 2), c(0.2, 0.3, 0.5)) 
#' }
#' @export md.sample
md.sample = function(name, array, weights=NULL) { new("md.sample", name, array, weights) }


#' md.constant
#' 
#' Creates information of a covariate that contains a fixed value (either numeric or date).
#' This function call must be added to the \code{\link{md.simparams}} object.
#' 
#' @param name name of the covariate
#' @param value value of the covariate
#' @usage md.constant(name, value)
#' @examples
#' 
#' \dontrun{
#' library(missDeaths)
#' 
#' sim = md.simparams() +
#'    md.constant("start", as.Date("1990-1-1")) 
#' }
#' @export md.constant
md.constant = function(name, value) { new("md.sample", name, value, NULL) }



#' md.death
#' 
#' Creates information of a time of death variable distributed according to the specified population mortality table and demographic information.
#' This function call must be added to the \code{\link{md.simparams}} object. 
#'  
#' @param name name of the column
#' @param poptable population mortality table used to simulate times od death
#' @param sexcol name of the column (covariate) specifying birth date
#' @param birthcol name of the column (covariate) specifying birth date 
#' @param startcol name of the column (covariate) specifying the start date from which to calculate time of death
#' @usage md.death(name, poptable, sexcol, birthcol, startcol)
#' @examples
#' 
#' \dontrun{
#' library(missDeaths)
#' 
#' sim = md.simparams() +
#'    md.sex("sex", 0.5) + 
#'      md.uniform("birth", as.Date("1930-1-1"), as.Date("1970-1-1")) +
#'        md.uniform("start", as.Date("2005-1-1"), as.Date("2010-1-1")) +
#'          md.death("death", survexp.us, "sex", "birth", "start") 
#' }
#' @export md.death
md.death = function(name, poptable, sexcol, birthcol, startcol) { new("md.death", name, poptable, sexcol, birthcol, startcol) }


#' md.data
#'
#' Creates information of covariates that are copies of covariates from an existing data set.
#' This function call must be added to the \code{\link{md.simparams}} object. 
#' 
#' @param data data.frame
#' @param randomsample controls whether the rows of the dataset are randomly sampled (with replaceent) or simply copied
#' @usage md.data(data, randomsample = FALSE)
#' @examples
#' 
#' \dontrun{
#' library(missDeaths)
#' 
#' sim = md.simparams() +
#'    md.data(data) 
#' }
#' @export md.data
md.data = function(data, randomsample = FALSE) {new("md.data", data, randomsample) } 


#' md.eval
#' 
#' Creates information of a covariate that is calculated (from other covariates) by evaluating a specified formula.
#' This function call must be added to the \code{\link{md.simparams}} object. 
#' 
#' @param name name of the covariate
#' @param eval string specifying the formula to calculate the covariate
#' @param center expected value of the calculated covariate
#' @param invisible specifies whether the calculated covariate is included in the simulated dataset
#' @usage md.eval(name, eval, center = Inf, invisible = FALSE)
#' @examples
#' 
#' \dontrun{
#' library(missDeaths)
#' 
#' sim = md.simparams() +
#'     md.uniform("birth", as.Date("1915-1-1"), as.Date("1930-1-1")) +
#'       md.uniform("start", as.Date("2000-1-1"), as.Date("2005-1-1")) +
#'           md.eval("age", "as.numeric(start - birth)/365.2425", 80, FALSE)
#' }
#' @export md.eval
md.eval = function(name, eval, center = Inf, invisible = FALSE) {new("md.eval", name, eval, center, invisible) } 


#' md.exp
#' 
#' Creates information of an event time variable distributed according to the specified exponential distribution.
#' This function call must be added to the \code{\link{md.simparams}} object. 
#' 
#' @param name name of the column
#' @param startcol column name specifying individual study start dates or a start date
#' @param covariates names of covariate columns used
#' @param betas betas for the corresponding covariate columns
#' @param lambda baseline lambda
#' @examples
#' 
#' \dontrun{
#' library(missDeaths)
#' 
#' sim = md.simparams() +
#'   md.uniform("X1", 0.5) + 
#'     md.norm("X2") +
#'       md.exp("event", as.Date("1915-1-1"), c("X1", "X2"), c(0.1, 0.2), 0.05/365.2425)
#' }
#' 
#' @export md.exp
md.exp = function(name, startcol, covariates, betas, lambda) { new("md.exp", name, startcol, covariates, betas, lambda) }


#' Censoring simulated survival data
#' 
#' The md.censor function takes the data set simulated by the \code{\link{md.simulate}} and transforms it into 
#' a right censored sample by adding two columns to the original data set:\cr
#'  - time specifies the time to event or censoring, whichever comes first, specified in days and \cr
#'  - status specifies the censoring indicator and equals 0 if the individual is censored or <>0 in case of event. 
#' 
#' @param data data.frame containig event times and covariates
#' @param start column name specifying start dates (left censoring) 
#' @param end column name specifying end dates (right censoring)
#' @param eventcolumns vector of column names specifying a single event time or multiple event times (in case of competing risks)
#' @return data.frame containing original data and columns time, maxtime and status
#' @examples
#' 
#' \dontrun{
#' library(missDeaths)
#' 
#' sim = md.simparams() +
#'   md.sex("sex", 0.5) + 
#'     md.uniform("birth", as.Date("1915-1-1"), as.Date("1930-1-1")) +
#'       md.uniform("start", as.Date("2000-1-1"), as.Date("2005-1-1")) +
#'         md.death("death", survexp.us, "sex", "birth", "start") +
#'           md.eval("age", "as.numeric(start - birth)/365.2425", 80, FALSE) + 
#'             md.exp("event", "start", c("age", "sex"), c(0.1, 2), 0.05/365.2425)
#'             
#' data = md.simulate(sim, 1000)
#' complete = md.censor(data, "start", as.Date("2010-1-1"), c("event", "death"))
#' }
#' 
#' @export md.censor
md.censor = function(data, start, end, eventcolumns)
{
  dropcolumns = eventcolumns
  if (is.character(start))
  {
    dropcolumns = c(dropcolumns, start)
    start = data[,which(colnames(data)==start)]
  }
  if (is.character(end))
  {
    dropcolumns = c(dropcolumns, end)
    end = data[,which(colnames(data)==end)]  
  }
  
  maxtime = as.numeric(end-start)
  if (length(maxtime) == 1)
    maxtime = rep(maxtime, dim(data)[1])
  
  for (i in 1:length(eventcolumns))
    eventcolumns[i] = which(colnames(data)==eventcolumns[i])
  events = data.frame(data[,as.numeric(eventcolumns)])
  for (i in 1:length(eventcolumns))
    events[,i] = as.numeric(as.Date(events[,i]) - start)

  time = apply(events, 1, min)
  time = pmin(time, maxtime)
  status = as.numeric(time != maxtime)
  
  for (i in 1:length(status))
    if (status[i] == 1)
    {
      for (j in 1:dim(events)[2])
        if (time[i] == events[i, j])
        {
          status[i] = j
          break
        }
    }
  
    #data.frame(data[ , -which(names(data) %in% dropcolumns)], time, maxtime, status)
    data.frame(data, time, maxtime, status)
}

Try the missDeaths package in your browser

Any scripts or data that you put into this service are public.

missDeaths documentation built on Oct. 23, 2020, 6:39 p.m.