knitr::opts_chunk$set(echo = TRUE)
library(epiuf)
library(kableExtra)

Dataset : DummyData

This document show how DummyData was built

# probability of NA in a vector to add missing data as well
prob_na <- 0.2 # probability of NA in a vector

# Generate a dataset with 100 observations
n <- 100
yearlen <- 365 # length of a year in days

# Generate ID variable
ID <- 1:n

# Generate character variable (Country)
# Use RefSite to create the columns 
Country <- rep(x = unique(RefSite$LocationName)[c(1,6,18,29)],times=(n/4))
CountryCode <- RefSite$CountryISO2Code[match(x = Country,table = RefSite$LocationName)]
CountryID <- RefSite$ID[match(x = Country,table = RefSite$LocationName)]

# Generate numeric variable (Age)
Age <- rep(c(45, 50, 55, 60, 65), length.out = n)

# Generate factor variable (CovID Brand)
CovVaccBr <- sample(c("Pfizer", "Moderna", "Astra Zeneca"), n, replace = TRUE)

# Generate binary variable (CovID Case)
Case <- sample(0:1, n, replace = TRUE,prob = c(0.75,0.25))

# Date of infection 
DatePos <- as.Date("2021-03-01") + sample((0:365)*2, n, replace = TRUE)
DatePos[which(Case==0)] <- NA

# Date of enrolment in the study
EnrolmentDate <- as.Date("2021-05-01") + sample(0:yearlen, n, replace = TRUE)
# Generate date variable (Vaccination Date)
VaccDate <- as.Date("2021-01-01") + sample(0:(yearlen*2), n, replace = TRUE)
VaccDate <- as.Date(ifelse(runif(n) < prob_na, NA, VaccDate),origin = "1970-01-01")
CovVaccBr[which(is.na(VaccDate))] <- NA

# Create the dataset
DummyData <- data.frame(
  ID = ID,
  Country = as.factor(Country),
  CountryCode = as.factor(CountryCode),
  CountryID = as.factor(CountryID),
  Age = Age,
  EnrolmentDate = EnrolmentDate,
  CovVaccBr = as.factor(CovVaccBr),
  VaccDate = VaccDate,
  Case = Case,
  DatePos = DatePos
)

# Print the first few rows of the dataset
head(DummyData)
usethis::use_data(DummyData,overwrite = T)


Epiconcept-Paris/STRAP-epiuf documentation built on Aug. 5, 2024, 3:41 a.m.