#' Simulate data
#' @export
#' @import dplyr
#' @import data.table
simulate_ep_data_no_dplyr <- function(
seed = 1,
n_patients = 350000,
age_min = 45,
age_max = 75,
n_exclusion_year = 7500,
admission_year_min = 2007,
admission_year_max = 2013
) {
set.seed(seed)
# 350 000 premiers cas de I26
simulated_data <- data.frame(
id_patient = seq_len(n_patients),
diagnoses = "PE",
procedures = as.character(''),
age = round(runif(n_patients, min = age_min, max = age_max)),
stringsAsFactors = FALSE
)
# tirage au sort de 33% de séjours I26 dupliqués (465000 séjours au total)
patients_to_duplicate <- sample(
x = seq_len(n_patients),
size = round(n_patients/3),
replace = TRUE
)
simulated_data <-
simulated_data[c(seq_len(n_patients), patients_to_duplicate),]
n_year <- admission_year_max - admission_year_min + 1
n_exclusion <- n_exclusion_year * n_year
# ajout de 50 000 critères d'exclusion annuels (probabilité de 7500/an)
## bind_rows(.,sample_n(mutate(., diagnoses = "EXCLUSION"),size = 50000, replace = TRUE)) %>%
patients_excluded <- sample(
x = seq_len(nrow(simulated_data)),
size = n_exclusion,
replace = TRUE
)
df_excluded <- simulated_data[patients_excluded, ]
df_excluded$diagnoses <- "EXCLUSION"
simulated_data <- rbind(simulated_data, df_excluded)
# Add date of admission
# Date as.Date("2013-12-12") - as.Date("2007-01-01") = 2537 jours
start_date <- as.Date(paste0(admission_year_min, "-01-01"))
n_days <- as.integer(as.Date(paste0(admission_year_max, "-12-12")) -
start_date)
simulated_data$admission <- round(runif(nrow(simulated_data), min = 1, max = n_days))
# Baseline risk of THR 1200/year = 8000 (+ 5/7)
##bind_rows(.,
## sample_n(., size = 11760, replace = FALSE) %>%
## mutate(admission = round(runif(dim(.)[1], min = 1, max = 2537))) %>%
## mutate(diagnoses = 'NODIAG', procedures = "THR"))
selected_patients_THR <- sample(
x = nrow(simulated_data),
size = 11760,
replace = FALSE
)
select_df <- simulated_data[selected_patients_THR, ]
select_df$admission <- round(runif(nrow(select_df), min = 1, max = n_days))
select_df$diagnoses <- "NODIAG"
select_df$procedures <- "THR"
simulated_data <- rbind(simulated_data, select_df)
# Risk of THR 3000 (+5/7) within 6 months before PE
## bind_rows(.,
## filter(., diagnoses %in% 'PE') %>%
## sample_n(size = 4200, replace = FALSE) %>%
## mutate(admission = admission - round(rexp(dim(.)[1])*40)) %>%
## mutate(diagnoses = 'NODIAG', procedures = "THR")
## )
sel_diag_PE <- simulated_data[simulated_data$diagnoses == "PE",]
sample_PE <- sample(x = seq_len(nrow(sel_diag_PE)), size = 4200, replace = FALSE)
df_PE <- sel_diag_PE[sample_PE, ]
df_PE$admission <- df_PE$admission - round(rexp(nrow(df_PE))*40)
df_PE$diagnoses <- "NODIAG"
df_PE$procedures <- "THR"
simulated_data <- rbind(simulated_data, df_PE)
##filter(admission > 0) %>%
simulated_data <- simulated_data[simulated_data$admission > 0, ]
##mutate(year = as.Date(admission, origin = "2007-01-01") %>% format("%Y") %>% as.numeric) %>%
simulated_data$year <- as.integer(format(
x = as.Date(simulated_data$admission, origin = start_date),
"%Y"
))
## mutate(length_of_stay = 1) %>%
## mutate(id_stay = 1:dim(.)[1]) %>%
simulated_data$length_of_stay <- 1
simulated_data$id_stay <- seq_len(nrow(simulated_data))
simulated_data$diagnoses <- as.factor(simulated_data$diagnoses)
simulated_data$procedures <- as.factor(simulated_data$procedures)
return(as.data.table(simulated_data))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.