Nothing
#--------Simulating data of size n------------------------
#' Simulate missing covariate or missing responses data based on an input covariate data
#' @description This function generates missing covariate or missing responses data. The missing data generation in the last two supplied covariates will be generated based on a predefined mechanisms. Missing data generation in the response variable will be based on the suppilied true alpha.
#' @param dataCov input data, the default number of covariates is 7 (5+2)
#' @param truebeta the beta parameter to be used to generate binary responses 1/0 s \code{logit(y=1)=x1+x2+x3}
#' @param truealpha to be used to generate nonignorable missing values based on the model \code{logit(R=1)=y+x1+x2+x3+x4+..}
#' @param x2Mar to be used to generate missing values in x2 based on the model \code{logit(x2=missing)=x1+y}
#' @param nsim number of simulated dataset, default is 2
#' @param ymiss to be used for missing responses, default is FALSE
#' @return returns a list with original data called originalData and a data with imputed missing values dataMissing
#' @export
#'
#' @examples
#' demo_df <- simulateCovariateData(100, nCov=6)
#' simulated_df <- simulateMissDfYorX(demo_df, nsim=2)
#' testMissData <- simulated_df$dataMissing
#' head(testMissData)
#'
#--------Simulating data of size n------------------------
simulateMissDfYorX <-function(dataCov, truebeta=c(1,-1,1,5), truealpha=c(-1,5, -1, -1, -1, 0.01), x2Mar=c(1,-1,-1), ymiss=FALSE, nsim=1 ){
# dataCov <- demo_df
data <- as.matrix(dataCov[,1:(length(truebeta)-1)])
data <- cbind(int=matrix(1, nrow(data)), data)
colnames(data)[1] <- "intercept"
l2=data %*% truebeta
p.1=1/(1+exp(-l2))
#---------creating missing values----------------------------------------
simulatedMissData <- data.frame()
originalDataSim <- data.frame()
for (i in 1:nsim){
y <- matrix(data = sapply(p.1, function(p) rbinom(n = 1, size = 1, prob = p)), ncol = 1)
originalData <- data.frame(y, dataCov)
# head(originalData)
tt.orig=data.frame(y, data[,-1]) #removing the intercept
temp_df <- dataCov
# temp_df <- dataCov
#---------creating missing values----------------------------------------
# Determine the total number of columns in 'data'
num_cols <- ncol(data)
if (ymiss==TRUE){
#----------Creating probabilities of creating R --------------------------
dataForalpha <- cbind(int=matrix(1, nrow(tt.orig)),originalData)[,1:length(truealpha)]
l2=as.matrix(dataForalpha) %*% truealpha
p.r=1/(1+exp(-l2))
R <- sapply(p.r, function(p) rbinom(n = 1, size = 1, prob = p))
# Convert to matrix if needed
R <- matrix(R, ncol = 1)
yTemp=ifelse(R==1,NA,y)
#creating a data with missing values
tt.miss=data.frame(yTemp, temp_df,R)
}
else{
dataForalpha <- cbind(int=matrix(1, nrow(tt.orig)),originalData)[,1:length(truealpha)]
# --creating prob for generating missing values in x2---dataForalpha[,c(3,6)] is picking up x1 and x4 -
temp <- cbind(int=matrix(1, nrow(data)), dataForalpha[,c(3,2)])
l3=as.matrix(temp) %*% x2Mar
p3=1/(1+exp(-l3))
# ---creating missing flag --------------------------------------------------
mf3=matrix(data = sapply(p3, function(p) rbinom(n = 1, size = 1, prob = p)), ncol = 1)
# table(mf3)
rows2createna <- which(mf3 == 1)
temp_df[rows2createna, colnames(dataCov)[2] ] <- NA
# --creating prob for generating missing values in x3 --dataForalpha[,c(3,4)] is picking up x1 and x2 --
temp <- cbind(int=matrix(1, nrow(data)), dataForalpha[,c(3)],mf3, y)
l4=as.matrix(temp) %*% c(1,-1, 1,-2)
p4=1/(1+exp(-l4))
# ---creating missing flag --------------------------------------------------
mf4=matrix(data = sapply(p4, function(p) rbinom(n = 1, size = 1, prob = p)), ncol = 1)
# table(mf4)
rows2createna <- which(mf4 == 1)
temp_df[rows2createna, colnames(dataCov)[3]] <- NA
# sum(is.na(temp_df))
tt.miss=data.frame(y,temp_df)
}
tt.miss$nsim <- paste0("Sim", i)
simulatedMissData <- rbind(simulatedMissData,tt.miss)
originalDataSimDf <- originalData
originalDataSimDf$nsim <- paste0("Sim", i)
originalDataSim <- rbind(originalDataSim,originalDataSimDf)
}
colnames(simulatedMissData)[1] <- "y"
#---dataOriginal is the original data with no missing values, dataMissing is the data
#---with missing y and X's. Only the variable soc and cond has missing values
return(list(dataOriginal=originalDataSim, dataMissing=simulatedMissData))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.