Nothing
#' @title Generation of Artificial Data
#'
#' @description The function generates a set of artificial data, including covariates generated by uniform
#' distribution with an interval \code{[0.5, 0.5]}, survival time and censoring status with measurement error and misclassifications.
#' In this function, users can specify different degrees of measurement
#' error that links observed survival time with true survival time, and links observed
#' censoring status with true censoring status. Moreover, the accelerated functional failure time model considered in
#' function is given by \code{T=f(X1)+f(X2)+f(X3)+f(X4)+error}, where \code{T} is log failure time and \code{f(X1)=4*x1^2+x1},
#' \code{f(X2)=sin(6*x2)},\code{f(X3)=cos(6*x3)-1} and \code{f(X4)=4*x4^3+x4^2}.
#'
#'
#' @param n Sample size.
#' @param p The number of covariates.
#' @param pi_01 Misclassifcation probability is P(Observed Censoring Status = 0 | Actual Censoring Status = 1).
#' @param pi_10 Misclassifcation probability is P(Observed Censoring Status = 1 | Actual
#' Censoring Status = 0).
#' @param gamma0 A scalar that links the observed survival time and true survival time in
#' the classical additive measurement error model \code{y*=y+gamma0+gamma1*X+v}, where y* is observed survival time and \code{y} is true survival time, and \code{x} is covariates and v is noise term.
#' @param gamma1 A \code{p}-dimensional vector of parameters in the
#' additive measurement error model \code{y*=y+gamma0+gamma1*X+v}, where \code{y*}
#' is observed survival time and \code{y} is true survival time, \code{x} is covariates and \code{v} is
#' noise term.
#' @param e_var The variance of noise term \code{v} in the additive measurement
#' error model \code{y*=y+gamma0+gamma1*X+v}, where \code{v} is assumed to follow a normal
#' distribution.
#'
#' @return generated_data \code{c(n,p+2)} dimensional data frame. The first column is observed survival time and
#' second column is observed censoring status, and the other columns are covariates.
#'
#' @examples
#' ## Set the relationship between observed survival time
#' ## and true survival time equals y*= y+1+X1+v, where the variance is
#' ## 0.75 with n=500 and p=50 and misclassification probability=0.9.
#'
#' a <- matrix(0,ncol=50, nrow = 1); a[1,1] <- 1
#' data <- data_gen(n=500, p=50, pi_01=0.9, pi_10 = 0.9, gamma0=1,
#' gamma1=a, e_var=0.75)
#' @export
#' @importFrom stats "rnorm" "runif"
#'
#'
data_gen <- function(n,p,pi_01,pi_10,gamma0,gamma1,e_var){
f1 <- function(x1){
y <- 4*x1^2+x1
return(y)
}
f2 <- function(x2){
y <- sin(6*x2)
return(y)
}
f3 <- function(x3){
y <- cos(6*x3)-1
return(y)
}
f4 <- function(x4){
y <- 4*x4^3+x4^2
return(y)
}
t<- c()
e <- c()
d <- c()
covariates <- c()
dim = p
n = n
for (i in c(1:n)){
p <- runif(dim,-0.5,0.5)
error <- rnorm(1,0,1)
a <- f1(p[1])+f2(p[2])+f3(p[3])+f4(p[4])+error
t[i] <- a
e[i] <- error
d <- t(data.frame(p))
covariates <- rbind(covariates,d)
}
censoring_probability_n <- c()
coefficients <-matrix(runif(dim,-5,5),nrow = 1)
for(i in c(1:dim(covariates)[1])){
variables <- t(matrix(covariates[i,],nrow = 1))
df4 <- coefficients%*%variables
censoring_probability <- (exp(df4))/(1+exp(df4))
censoring_probability_n <- c(censoring_probability_n,censoring_probability)
}
censoring_indicator <- (censoring_probability_n >=0.5)*1
y <- c()
for (i in c(1:length(censoring_indicator))){
if (censoring_indicator[i]==1){
y <- c(y,t[i])
}else{
y <- c(y,t[i]-exp(0.003))
}
}
df_for_y_with_measurement_error <- c()
v <- data.frame(rnorm(length(censoring_indicator),0,e_var))
constant <- data.frame(rep(gamma0,length(censoring_indicator)))
b_cov <- c()
for (i in 1:dim(covariates)[1]){
b_cov[i] <- gamma1 %*% covariates[i,]
}
df_for_y_with_measurement_error <- cbind(y,constant,v,b_cov)
colnames(df_for_y_with_measurement_error)[2:3] <-c('constant','v')
y_with_measurement_error <- data.frame(apply(df_for_y_with_measurement_error,1,sum))
misclassification_proportion <- matrix(c(1-pi_01,pi_01,pi_10,1-pi_10), ncol = 2)
censoring_probability_indicator_1 <- censoring_probability_n
censoring_probability_indicator_0 <- 1 - censoring_probability_n
df5 <- cbind(censoring_probability_indicator_1,censoring_probability_indicator_0)
censoring_indicator_with_measurement_error <- NULL
for (i in c(1:length(censoring_indicator))){
df6 <- misclassification_proportion %*%df5[i,]
if (df6[1,]>=0.5){
censoring_indicator_with_measurement_error <- c(censoring_indicator_with_measurement_error,1)
}else{
censoring_indicator_with_measurement_error <- c(censoring_indicator_with_measurement_error,0)
}
}
generated_data <- cbind(y_with_measurement_error, censoring_indicator_with_measurement_error, covariates)
colnames(generated_data)[1:2] <- c('failure time', 'censoring indicator')
return(generated_data)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.