R/simulateData.R

Defines functions simulateData

Documented in simulateData

#--------Simulating data of size n------------------------
#' Simulate data based on an input covariate data
#'@description This function generates missing data both in the response variables as well as in the predictors. The missing data generation in the last two supplied covariates will be generated based on a predefined mechanisms. Missing data generation in the response variable will be based on the suppilied true alpha.
#' @param dataCov input data, the default number of covariates is 7 (5+2)
#' @param truebeta the beta parameter to be used to generate binary response values 1/0 s \code{logit(y=1)=x1+x2+x3}
#' @param truealpha to be used to generate nonignorable missing values based on the model \code{logit(R=1)=y+x1+x2+x3+x4+..}
#' @param nsim number of simulated dataset, default is 2
#'
#' @return returns a list with original data called originalData and a data with imputed missing values dataMissing
#' @export
#'
#' @examples
#' demo_df <- simulateCovariateData(100, nCov=6)
#' simulated_df <- simulateData(demo_df, nsim=2)
#' testMissData <- simulated_df$dataMissing
#' head(testMissData)
#'
#--------Simulating data of size n------------------------
#' @importFrom stats runif rbinom rpois rnorm
simulateData <-function(dataCov, truebeta=c(1,-1,1,5), truealpha=c(-1,5, -1, -1, -1, 0.01), nsim=2 ){

  data <- as.matrix(dataCov[,1:(length(truebeta)-1)])
  data <- cbind(int=matrix(1, nrow(data)), data)
  colnames(data)[1] <- "intercept"

  l2=data %*% truebeta
  p.1=1/(1+exp(-l2))

  #---------creating missing values----------------------------------------
  simulatedMissData <- data.frame()

  for (i in 1:nsim){
  y <- matrix(data = sapply(p.1, function(p) rbinom(n = 1, size = 1, prob = p)), ncol = 1)
  originalData <- data.frame(y, dataCov)
  tt.orig=data.frame(y, data[,-1]) #removing the intercept

  #----------Creating probabilities of creating R --------------------------
  dataForalpha <- cbind(int=matrix(1, nrow(tt.orig)),originalData)[,1:length(truealpha)]
  l2=as.matrix(dataForalpha) %*% truealpha
  p.r=1/(1+exp(-l2))

  # --creating prob for generating missing values in x2---dataForalpha[,c(3,6)] is picking up x1 and x4 -
  temp <- cbind(int=matrix(1, nrow(data)), dataForalpha[,c(3,6)])
  l3=as.matrix(temp) %*% c(1,-5,0.1)
  p3=1/(1+exp(-l3))

  # --creating prob for generating missing values in x3 --dataForalpha[,c(3,4)] is picking up x1 and x2 --
  temp <- cbind(int=matrix(1, nrow(data)), dataForalpha[,c(3,4)], y)
  l4=as.matrix(temp) %*% c(-3,-1, 1,1)
  p4=1/(1+exp(-l4))


    R <- matrix(data = sapply(p.r, function(p) rbinom(n = 1, size = 1, prob = p)), ncol = 1)
    temp_df <- dataCov
    #---------creating missing values----------------------------------------
    # Determine the total number of columns in 'data'
    num_cols <- ncol(data)

    # ---creating missing flag --------------------------------------------------
    mf3=matrix(data = sapply(p3, function(p) rbinom(n = 1, size = 1, prob = p)), ncol = 1)
    rows2createna <- which(mf3 == 1)
    temp_df[rows2createna, colnames(dataCov)[2] ] <- NA

    # ---creating missing flag --------------------------------------------------
    mf4=matrix(data = sapply(p4, function(p) rbinom(n = 1, size = 1, prob = p)), ncol = 1)
    rows2createna <- which(mf4 == 1)
    temp_df[rows2createna, colnames(dataCov)[3]] <- NA

    yTemp=ifelse(R==1,NA,y)

    #creating a data with missing values
    tt.miss=data.frame(yTemp, temp_df,R)
    tt.miss$nsim <- paste0("Sim", i)
    simulatedMissData <- rbind(simulatedMissData,tt.miss)

  }
  colnames(simulatedMissData)[1] <- "y"
  #---dataOriginal is the original data with no missing values, dataMissing is the data
  #---with missing y and X's. Only the variable soc and cond has missing values
  return(list(dataOriginal=originalData, dataMissing=simulatedMissData))

}

Try the glmfitmiss package in your browser

Any scripts or data that you put into this service are public.

glmfitmiss documentation built on June 8, 2025, 1:59 p.m.