Nothing
#--------Simulating data of size n------------------------
#' Simulate data based on an input covariate data
#'@description This function generates missing data both in the response variables as well as in the predictors. The missing data generation in the last two supplied covariates will be generated based on a predefined mechanisms. Missing data generation in the response variable will be based on the suppilied true alpha.
#' @param dataCov input data, the default number of covariates is 7 (5+2)
#' @param truebeta the beta parameter to be used to generate binary response values 1/0 s \code{logit(y=1)=x1+x2+x3}
#' @param truealpha to be used to generate nonignorable missing values based on the model \code{logit(R=1)=y+x1+x2+x3+x4+..}
#' @param nsim number of simulated dataset, default is 2
#'
#' @return returns a list with original data called originalData and a data with imputed missing values dataMissing
#' @export
#'
#' @examples
#' demo_df <- simulateCovariateData(100, nCov=6)
#' simulated_df <- simulateData(demo_df, nsim=2)
#' testMissData <- simulated_df$dataMissing
#' head(testMissData)
#'
#--------Simulating data of size n------------------------
#' @importFrom stats runif rbinom rpois rnorm
simulateData <-function(dataCov, truebeta=c(1,-1,1,5), truealpha=c(-1,5, -1, -1, -1, 0.01), nsim=2 ){
data <- as.matrix(dataCov[,1:(length(truebeta)-1)])
data <- cbind(int=matrix(1, nrow(data)), data)
colnames(data)[1] <- "intercept"
l2=data %*% truebeta
p.1=1/(1+exp(-l2))
#---------creating missing values----------------------------------------
simulatedMissData <- data.frame()
for (i in 1:nsim){
y <- matrix(data = sapply(p.1, function(p) rbinom(n = 1, size = 1, prob = p)), ncol = 1)
originalData <- data.frame(y, dataCov)
tt.orig=data.frame(y, data[,-1]) #removing the intercept
#----------Creating probabilities of creating R --------------------------
dataForalpha <- cbind(int=matrix(1, nrow(tt.orig)),originalData)[,1:length(truealpha)]
l2=as.matrix(dataForalpha) %*% truealpha
p.r=1/(1+exp(-l2))
# --creating prob for generating missing values in x2---dataForalpha[,c(3,6)] is picking up x1 and x4 -
temp <- cbind(int=matrix(1, nrow(data)), dataForalpha[,c(3,6)])
l3=as.matrix(temp) %*% c(1,-5,0.1)
p3=1/(1+exp(-l3))
# --creating prob for generating missing values in x3 --dataForalpha[,c(3,4)] is picking up x1 and x2 --
temp <- cbind(int=matrix(1, nrow(data)), dataForalpha[,c(3,4)], y)
l4=as.matrix(temp) %*% c(-3,-1, 1,1)
p4=1/(1+exp(-l4))
R <- matrix(data = sapply(p.r, function(p) rbinom(n = 1, size = 1, prob = p)), ncol = 1)
temp_df <- dataCov
#---------creating missing values----------------------------------------
# Determine the total number of columns in 'data'
num_cols <- ncol(data)
# ---creating missing flag --------------------------------------------------
mf3=matrix(data = sapply(p3, function(p) rbinom(n = 1, size = 1, prob = p)), ncol = 1)
rows2createna <- which(mf3 == 1)
temp_df[rows2createna, colnames(dataCov)[2] ] <- NA
# ---creating missing flag --------------------------------------------------
mf4=matrix(data = sapply(p4, function(p) rbinom(n = 1, size = 1, prob = p)), ncol = 1)
rows2createna <- which(mf4 == 1)
temp_df[rows2createna, colnames(dataCov)[3]] <- NA
yTemp=ifelse(R==1,NA,y)
#creating a data with missing values
tt.miss=data.frame(yTemp, temp_df,R)
tt.miss$nsim <- paste0("Sim", i)
simulatedMissData <- rbind(simulatedMissData,tt.miss)
}
colnames(simulatedMissData)[1] <- "y"
#---dataOriginal is the original data with no missing values, dataMissing is the data
#---with missing y and X's. Only the variable soc and cond has missing values
return(list(dataOriginal=originalData, dataMissing=simulatedMissData))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.