##### Fake Data Creation #####
# Endoscopies() creates a spreadsheet of Endoscopy data,Histop_df() creates a dataframe of
# Histopathology data. Endomerge() associates them together
# EndoRaw() creates an Endoscopy report (so the data has not already been extracted) for upper GI
# ColonEndoRaw() creates an Endoscopy report (so the data has not already been extracted) for lower GI
# pathRep() creates an Pathology report (so the data has not already been extracted) for upper GI
# ColonpathRep() creates an Pathology report (so the data has not already been extracted) for upper GI
#' FakeEndoCreator
#'
#' Creates fake endoscopic data to play with as a spreadsheet format. It assumes
#' that some of the data has already been separated out. EndoRaw() for upper GI
#' and ColonEndoRaw() for lower GI are better functions for the real
#' scenario of just getting the report as a series of unextracted text files.
#' (Their histology equivalents are pathRep and ColonpathRep respectively)
#' @param x None needed
#' @keywords Fake endoscopy
#' @export
#' @examples
#' Endoscopies()
Endoscopies <- function(x) {
Endoscopist <- list(
x1 = "Dr Jonny Begood", x2 = "Dr Elvis Presley",
x3 = "Dr Bilbo Baggins", x4 = "Dr Elmo Fudd",
x5 = "Dr Jimminey Cricket", x6 = "Dr Davy Jones",
x7 = "Dr Bugs Bunny", x8 = "Dr Rara Rasputin",
x9 = "Dr Chubby Checker", x10 = "Dr Frank Sinatra",
x11 = "Dr Charles Dickens", x12 = "Dr Joseph Conrad",
x13 = "Dr Florence Nightingale", x14 = "Dr Sal Addin",
x15 = "Dr King Richard III"
)
Midazolam <- list(
x = "1mg", x = "2mg", x = "3mg",
x = "4mg", x = "5mg", x = "6mg", x = "7mg",
x = "8mg"
)
Fentanyl <- list(
x = "12.5mcg", x = "25mcg", x = "50mcg",
x = "75mcg", x = "100mcg", x = "125mcg", x = "150mcg"
)
Indication <- list(
x1 = "Therapeutic- Dilatation",
x2 = "Other-", x3 = "Follow-up ULCER HEALING",
x4 = "Haematemesis or Melaena/Blood PR", x5 = "Previous OGD ? 8 months ago",
x6 = "Dysphagia/Odynophagia", x7 = "Surveillance-Barrett's",
x8 = "Nausea and/or Vomiting", x9 = "Weight Loss",
x10 = "Dysphagia/intermittent for a few months",
x11 = "Other-", x12 = "Small Bowel Biopsy",
x13 = "Dyspepsia", x14 = "Reflux-like Symptoms/Atypical Chest Pain",
x15 = "chronic abdo pain and constipaton",
x16 = "Oesophagus- Dysplasia", x17 = "Therapeutic- RFA"
)
Diagnosis <- list(
x1 = "Ulcer- Oesophageal. ",
x2 = "Post chemo-radiotherapy stricture ",
x3 = "Possible achalasia.", x4 = "Oesophagitis. ",
x5 = "Food bolus obstructing the oesophagus.",
x6 = "Hiatus Hernia. ", x7 = "Extensive neoplastic looking esophageal lesion. ",
x8 = "Esophageal candidiasis ", x9 = "Barretts oesophagus. ",
x10 = "Gastritis"
)
Endodat <- sample(seq(as.Date("2013/01/01"), as.Date("2017/05/01"),
by = "day"
), 1000)
EndoHospNum <- sample(c(
"P433224", "P633443", "K522332",
"G244224", "S553322", "D0739033", "U873352",
"P223333", "Y763634", "I927282", "P223311",
"P029834", "U22415", "U234252", "S141141",
"O349253", "T622722", "J322909", "F630230",
"T432452"
), 1000, replace = TRUE)
# Yes I know... This was just easier..
BarrettsLength <- c(
"C0M1", "C0M2", "C0M3", "C0M4",
"C0M5", "C0M6", "C0M7", "C0M8", "C0M9", "C0M10",
"C1M2", "C1M3", "C1M4", "C1M5", "C1M6", "C1M7",
"C1M8", "C1M9", "C1M10", "C2M3", "C2M4", "C2M5",
"C2M6", "C2M7", "C2M8", "C2M9", "C2M10", "C3M4",
"C3M5", "C3M6", "C3M7", "C3M8", "C3M9", "C3M10",
"C4M5", "C4M6", "C4M7", "C4M8", "C4M9", "C4M10",
"C5M6", "C5M7", "C5M8", "C5M9", "C5M10", "C6M7",
"C6M8", "C6M9"
)
# Merge them all together into a dataframe
Endoscopies <- data.frame(EndoHospNum, replicate(
1000,
paste(
"Date of Procedure", sample(Endodat,
1,
replace = F
), " Endoscopist: ", sample(Endoscopist,
1,
replace = F
), "Midazolam: ", sample(Midazolam,
1,
replace = F
), "Fentanyl: ", sample(Fentanyl,
1,
replace = F
), "Indication:", sample(Indication,
1,
replace = F
), "Diagnosis:", stringr::str_c(sample(Diagnosis,
sample(1:10, 1),
replace = F
), collapse = "."),
sample(c("", paste(
"Barrett's oesophagus length:",
sample(BarrettsLength, 1)
)), 1)
)
))
# Lets rename the one column to something more
# intelligent
names(Endoscopies) <- c("HospNum_Id", "EndoReports")
######### Data accordionisation Convert into paragraphs so
######### can be more easily separated
Endoscopies$Date <- stringr::str_extract(
Endoscopies$EndoReports,
"Date of Procedure.*Endoscopist"
)
Endoscopies$Endoscopist <- stringr::str_extract(
Endoscopies$EndoReports,
"Endoscopist:.*Midazolam"
)
Endoscopies$Midazolam <- stringr::str_extract(
Endoscopies$EndoReports,
"Midazolam:.*Fentanyl"
)
Endoscopies$Fentanyl <- stringr::str_extract(
Endoscopies$EndoReports,
"Fentanyl:.*Indication"
)
Endoscopies$Indication <- stringr::str_extract(
Endoscopies$EndoReports,
"Indication:.*Diagnosis"
)
Endoscopies$Diagnosis <- stringr::str_extract(
Endoscopies$EndoReports,
"Diagnosis:.*"
)
Endoscopies$BarrC <- stringr::str_extract(
Endoscopies$EndoReports,
" oesophagus length: C.*M.*"
)
Endoscopies$BarrM <- stringr::str_extract(
Endoscopies$BarrC,
"M.*"
)
######### Data cleaning Endoscopy dataset and formatting
######### the columns
Endoscopies$Date <- gsub(
"Date of Procedure", "",
Endoscopies$Date
)
# Note we are using the date conversion function
# here
Endoscopies$Date <- as.Date(gsub(
" Endoscopist",
"", Endoscopies$Date
), format = "%Y-%m-%d")
Endoscopies$Endoscopist <- gsub(
"Endoscopist: Dr ",
"", Endoscopies$Endoscopist
)
Endoscopies$Endoscopist <- gsub(
"Midazolam", "",
Endoscopies$Endoscopist
)
Endoscopies$Midazolam <- gsub(
"Midazolam: ", "",
Endoscopies$Midazolam
)
# Also reformatting this column into a nueric
# column at the same time
Endoscopies$Midazolam <- as.numeric(gsub(
"mg Fentanyl",
"", Endoscopies$Midazolam
))
Endoscopies$Fentanyl <- gsub(
"Fentanyl: ", "",
Endoscopies$Fentanyl
)
# Also reformatting this column into a nueric
# column at the same time
Endoscopies$Fentanyl <- as.numeric(gsub(
"mcg Indication",
"", Endoscopies$Fentanyl
))
Endoscopies$Indication <- gsub(
"Indication: ",
"", Endoscopies$Indication
)
Endoscopies$Indication <- gsub(
" Diagnosis", "",
Endoscopies$Indication
)
Endoscopies$Diagnosis <- gsub(
"Indication: ", "",
Endoscopies$Diagnosis
)
Endoscopies$Diagnosis <- gsub(
" Diagnosis", "",
Endoscopies$Diagnosis
)
Endoscopies$BarrC <- gsub(
"oesophagus length: ",
"", Endoscopies$BarrC
)
# Also reformatting this column into a nueric
# column at the same time
Endoscopies$BarrC <- gsub("M.*", "", Endoscopies$BarrC)
Endoscopies$BarrC <- as.numeric(gsub("C", "", Endoscopies$BarrC))
# Also reformatting this column into a nueric
# column at the same time
Endoscopies$BarrM <- as.numeric(gsub("M", "", Endoscopies$BarrM))
# load(file = "Endoscopies.rda")
return(Endoscopies)
}
#' FakeHistolCreator
#'
#' Creates fake histology data to play with
#' @param x None needed
#' @keywords Fake histology spreadshet data
#' @export
#' @examples
#' Histop_df()
Histop_df <- function(x) {
# Generate a load of strings
line <- list(
x1 = "Intestinal metaplasia is present.",
x2 = "Basal hyperplasia is prominent", x3 = "There is no dysplasia or malignancy.",
x4 = "No Helicobacter are seen.", x5 = "There is some ulceration.",
x6 = "There is no intercellular oedema in the surface epithelium.",
x7 = " PAS staining shows occasional spores, consistent with candida.",
x8 = " No herpetic viral inclusions are seen.",
x9 = " There is no dysplasia and no invasive carcinoma.",
x10 = " There is mild regenerative epithelial change, but neither dysplasia nor malignancy is seen.",
x11 = "The appearances are consistent with the endoscopic diagnosis of Barrett's oesophagus with active chronic inflammation.",
x12 = "The biopsies of oesophageal squamous mucosa show surface erosion and active chronic inflammation.",
x13 = "Numerous Candida spores and hyphae are present admixed with ulcer slough.",
x14 = "There is reactive basal cell hyperplasia and mild inflammatory epithelial atypia.",
x15 = "There is no significant increase in intraepithelial eosinophils.",
x16 = "No granulomas or viral inclusions are seen.",
x17 = "The appearances are those of Candida oesophagitis.",
x18 = "Neither dysplasia nor malignancy is seen.",
x19 = "The appearances are consistent with, but not specific for Barrett's (columnar lined) oesophagus.",
x20 = "High grade dysplasia is present throughout this sample",
x21 = "There is low grade dysplasia", x22 = "This is a dysplastic sample"
)
list.of.samples <- replicate(1000, paste(
"Macrosopic description:",
sample(1:10, 1), "specimens collected the largest measuring",
sample(1:5, 1), "x", sample(1:5, 1), "x", sample(
1:5,
1
), "mm and the smallest", sample(
1:5,
1
), "x", sample(1:5, 1), "x", sample(
1:5,
1
), "mm"
), simplify = FALSE)
# Merge the strings together randomly
histop <- replicate(1000, paste(sample(list.of.samples,
1,
replace = F
), paste("Diagnoses", stringr::str_c(sample(line,
sample(3:10, 1),
replace = F
), collapse = "."))))
# Because we eventually will merge histopath and
# endoscopy together we are going to be crafty and
# generate the histopath dates from the endoscopy
# dates with 0-2 days difference
dat <- Endoscopies$Date + sample(0:2, 1)
dat <- sample(seq(as.Date("2013/01/01"), as.Date("2017/05/01"),
by = "day"
), 1000)
# Generate hospital numbers from the Endoscopies
# report
HospNum_Id <- Endoscopies$HospNum_Id
Histop_df <- data.frame(HospNum_Id, dat, paste(
"Date received:",
dat, histop
))
names(Histop_df) <- c("HospNum_Id", "dat", "HistoReport")
######### Data accordionisation Convert into paragraphs so
######### can be more easily separated
Histop_df$Date <- stringr::str_extract(
Histop_df$HistoReport,
"Date received:.*Macrosopic description:"
)
Histop_df$Macro <- stringr::str_extract(
Histop_df$HistoReport,
"Macrosopic description:.*Diagnoses"
)
Histop_df$Diagnoses <- stringr::str_extract(
Histop_df$HistoReport,
"Diagnoses.*"
)
######### Data cleaning Histopathology dataset and
######### formatting the columns
Histop_df$Date <- gsub("Date received: ", "", Histop_df$Date)
Histop_df$Date <- as.Date(gsub(
"Macrosopic description:",
"", Histop_df$Date
), format = "%Y-%m-%d")
Histop_df$Macro <- gsub(
"Macrosopic description: ",
"", Histop_df$Macro
)
Histop_df$Macro <- gsub("Diagnoses", "", Histop_df$Macro)
Histop_df$Diagnoses <- gsub("Diagnoses", "", Histop_df$Diagnoses)
# Lets get rid of a column we don't need
Histop_df$dat <- NULL
# load(file = "Histop_df.rda")
return(Histop_df)
}
######### Data merging We can merge straight away as we
######### have the same names for the columns date and
######### HospNum_Id so no need to mess around. We will use
######### the fuzzyjoin method as there is sometimes a gap
######### between the endoscopy date and the date that the
######### histopathology was received:
samplenumber <- 2000
HospitalNumberID <- paste("Hospital Number: ", sample(c(LETTERS)),
sample(1e+06:9999999, (samplenumber - 1900), replace = T),
sep = ""
)
NHS_Trust <- replicate(samplenumber, c("Hospital: Random NHS Foundation Trust"))
Patient_Name <- paste("Patient Name: ", randomNames::randomNames(
samplenumber,
"first", "last"
))
Date_of_Birth <- paste("DOB: ", generator::r_date_of_births(samplenumber,
start = as.Date("1900-01-01"), end = as.Date("1999-01-01")
))
GeneralPractictioner <- paste("General Practitioner: Dr. ",
randomNames::randomNames(samplenumber, "first", "last"),
sep = ""
)
Date_of_ProcedureAll <- generator::r_date_of_births(samplenumber,
start = as.Date("2001-01-01"), end = as.Date("2017-01-01")
)
#' EndoRaw
#'
#' Generates fake Endoscopy date
#' @param x None needed
#' @keywords fake endoscopy data
#' @import randomNames
#' @import generator
#' @export
#' @examples
#' EndoRaw(x)
EndoRaw2 <- function() {
Date_of_Procedure <- Date_of_ProcedureAll
Date <- paste("Date of procedure: ", Date_of_Procedure)
EndoscopistList <- as.list(sample(randomNames::randomNames(
samplenumber,
"first", "last"
), 10, replace = T))
Second_EndoscopistList <- as.list(sample(randomNames::randomNames(
samplenumber,
"first", "last"
), 10, replace = T))
Endoscopist <- replicate(samplenumber, paste("Endoscopist: Dr. ",
sample(EndoscopistList, 1, replace = F),
sep = ""
))
Second_Endoscopist <- replicate(samplenumber, paste("2nd Endoscopist: Dr. ",
sample(Second_EndoscopistList, 1, replace = F),
sep = ""
))
MedicationsFent <- replicate(samplenumber, paste(
"Medications: Fentanyl ",
sample(list(
x = "12.5mcg", x = "25mcg", x = "50mcg",
x = "75mcg", x = "100mcg", x = "125mcg",
x = "150mcg"
), 1, replace = F)
))
MedicationsMidaz <- replicate(samplenumber, paste(
"Midazolam ",
sample(list(
x = "1mg", x = "2mg", x = "3mg",
x = "4mg", x = "5mg", x = "6mg", x = "7mg"
),
1,
replace = F
)
))
Instrument <- replicate(samplenumber, paste(
"Instrument: ",
sample(list(
x = "FG1", x = "FG2", x = "FG3",
x = "FG4", x = "FG5", x = "FG6", x = "FG7"
),
1,
replace = F
)
))
Extent_of_Exam <- replicate(samplenumber, paste(
"Extent of Exam: ",
sample(list(
x = "Failed intubation", x = "Oesophagus",
x = "Stomach body", x = "D1", x = "D2",
x = "Pylorus", x = "GOJ"
), 1, replace = F)
))
# Import the Findings text from data folder - but
# how to get it there?
INDICATIONS_FOR_EXAMINATION <- replicate(
samplenumber,
paste("Indications:", sample(list(
x1 = "Therapeutic- Dilatation",
x2 = "Other-", x3 = "Follow-up ULCER HEALING",
x4 = "Haematemesis or Melaena/Blood PR",
x5 = "Previous OGD ? 8 months ago", x6 = "Dysphagia/Odynophagia",
x7 = "Surveillance-Barrett's", x8 = "Nausea and/or Vomiting",
x9 = "Weight Loss", x10 = "Dysphagia/intermittent for a few months",
x11 = "Other-", x12 = "Small Bowel Biopsy",
x13 = "Dyspepsia", x14 = "Reflux-like Symptoms/Atypical Chest Pain",
x15 = "chronic abdo pain and constipaton",
x16 = "Oesophagus- Dysplasia", x17 = "Therapeutic- RFA"
),
1,
replace = F
))
)
PROCEDURE_PERFORMED <- "Procedure Performed: Gastroscopy (OGD)"
FINDINGS <- read.table("/home/rstudio/EndoMineR/data-raw/data/FindingsText",
header = T, stringsAsFactors = F
)
FINDINGS <- replicate(samplenumber, paste(
"Findings: ",
stringr::str_c(as.list(sample(FINDINGS$x, sample(1:10),
replace = T
)), collapse = ",")
))
TherapyorNot <- replicate(
samplenumber,
paste(sample(list(
x1 = "Therapeutic- Dilatation was performed",
x2 = "", x3 = "HALO 90 done with good effect",
x4 = "TTS HALO to area",
x5 = "", x6 = "",
x7 = "A lesion underwent EMR", x8 = "",
x9 = "", x10 = "",
x11 = "", x12 = "Area APC'd",
x13 = "", x14 = "",
x15 = "",
x16 = "", x17 = "Therapeutic- RFA", x18 = "",
x19 = "", x20 = "", x21 = "", x22 = "", x23 = "", x24 = "", x25 = ""
),
1,
replace = F
))
)
ENDOSCOPIC_DIAGNOSIS <- data.frame(c(
"Ulcer- Oesophageal. ",
"Post chemo-radiotherapy stricture ", "Possible achalasia.",
"Oesophagitis. ", "Food bolus obstructing the oesophagus.",
"Hiatus Hernia. ", "Extensive neoplastic looking esophageal lesion. ",
"Esophageal candidiasis ", "Barretts oesophagus. ",
"Gastritis"
), stringsAsFactors = F)
names(ENDOSCOPIC_DIAGNOSIS) <- "x"
ENDOSCOPIC_DIAGNOSIS <- replicate(
samplenumber,
paste("Endoscopic Diagnosis: ", stringr::str_c(as.list(sample(ENDOSCOPIC_DIAGNOSIS$x,
sample(1:3),
replace = F
)), collapse = ","))
)
# Now put it all together in one long text to
# simulate a real Endoscopic report
TheOGDReport <- data.frame(
NHS_Trust, HospitalNumberID,
Patient_Name, GeneralPractictioner, Date, Endoscopist,
Second_Endoscopist, MedicationsFent, MedicationsMidaz,
Instrument, Extent_of_Exam, INDICATIONS_FOR_EXAMINATION,
PROCEDURE_PERFORMED, FINDINGS, TherapyorNot, ENDOSCOPIC_DIAGNOSIS
)
# Now paste the OGD report dataframe together to
# make the fake report:
TheOGDReportFinal <- tidyr::unite(TheOGDReport,
cat(paste(colnames(TheOGDReport), collapse = "\n")),
colnames(TheOGDReport),
sep = "\n"
)
names(TheOGDReportFinal) <- "OGDReportWhole"
save(TheOGDReportFinal, file = "/home/rstudio/EndoMineR/data/TheOGDReportFinal.rda")
# return(TheOGDReportFinal)
Myendo <- TheOGDReportFinal
Myendo$OGDReportWhole <- gsub("2nd Endoscopist:", "Second endoscopist:", Myendo$OGDReportWhole)
EndoscTree <- list(
"Hospital Number:", "Patient Name:", "General Practitioner:",
"Date of procedure:", "Endoscopist:", "Second endoscopist:", "Medications",
"Instrument", "Extent of Exam:", "Indications:", "Procedure Performed:", "Findings:",
"Endoscopic Diagnosis:"
)
for (i in 1:(length(EndoscTree) - 1)) {
Myendo <- Extractor(
Myendo, "OGDReportWhole", as.character(EndoscTree[i]),
as.character(EndoscTree[i + 1]), as.character(EndoscTree[i])
)
}
Myendo$Dateofprocedure <- as.Date(Myendo$Dateofprocedure)
save(Myendo, file = "/home/rstudio/EndoMineR/data/Myendo.rda")
}
#' pathRep
#'
#' Creates raw Pathology reports
#' @param x None needed
#' @import stringr
#' @import generator
#' @keywords Pathology reports
#' @export
#' @examples
#' pathRep(x)
pathRep2 <- function() {
AccessionNum <- paste0("SP-", sample(10:99), "-", sample(1000000:9999999, 2000, replace = F))
Date <- Date_of_ProcedureAll + sample(1:12, 1)
Date <- paste("Date received: ", Date)
# replicate(samplenumber, as.numeric(sample(1:10)),
# 1))
# Clinical Details
ClinDet <- read.table("./data-raw/data/HistolClinDetText",
header = T, stringsAsFactors = F
)
ClinDet <- replicate(samplenumber, paste(
"Clinical Details: ",
stringr::str_c(as.list(sample(ClinDet$x, sample(1:10),
replace = T
)), collapse = ",")
))
# Nature of the specimen
NatureOfSpec <- read.table("./data-raw/data/HistolMacDescription.txt",
header = T, stringsAsFactors = F
)
NatureOfSpec <- replicate(samplenumber, paste(
sample(1:10, 1, replace = T), "specimen. Nature of specimen: ",
stringr::str_c(as.list(sample(NatureOfSpec$x, sample(1:10), replace = T)), collapse = ",")
))
MacDescrip <- unlist(replicate(samplenumber, paste(
"Macroscopic description:",
sample(1:10, 1), "specimens collected the largest measuring",
sample(1:5, 1), "x", sample(1:5, 1), "x", sample(1:5, 1), "mm and the smallest", sample(1:5, 1), "x", sample(1:5, 1), "x", sample(1:5, 1), "mm"
), simplify = FALSE))
# Merge the strings together randomly
# Histol Details
Histol <- read.table("./data-raw/data/HistolText", header = T, stringsAsFactors = F)
Histol <- replicate(samplenumber, paste(
"Histology: ",
stringr::str_c(as.list(sample(Histol$x, sample(1:10),
replace = T
)), collapse = ",")
))
Diagnostic <- read.table("./data-raw/data/HistolDxText",
header = T, stringsAsFactors = F
)
Diagnostic <- replicate(samplenumber, paste(
"Diagnosis: ",
stringr::str_c(as.list(sample(Diagnostic$x, sample(5:10),
replace = T
)), collapse = ",")
))
PathDataFrameReport <- data.frame(
AccessionNum, NHS_Trust, HospitalNumberID,
Patient_Name, Date_of_Birth, GeneralPractictioner,
Date, ClinDet, NatureOfSpec, MacDescrip, Histol,
Diagnostic
)
PathDataFrameFinal <- tidyr::unite(PathDataFrameReport,
cat(paste(colnames(PathDataFrameReport), collapse = "\n")),
colnames(PathDataFrameReport),
sep = "\n"
)
names(PathDataFrameFinal) <- "PathReportWhole"
save(PathDataFrameFinal, file = "/home/rstudio/EndoMineR/data/PathDataFrameFinal.rda")
#################
Mypath <- PathDataFrameFinal
HistolTree <- list(
"Hospital Number", "Patient Name", "DOB:",
"General Practitioner:",
"Date received:", "Clinical Details:", "Macroscopic description:", "Histology:",
"Diagnosis:", ""
)
for (i in 1:(length(HistolTree) - 1)) {
Mypath <- Extractor(
Mypath, "PathReportWhole", as.character(HistolTree[i]),
as.character(HistolTree[i + 1]), as.character(HistolTree[i])
)
}
colnames(Mypath)[which(names(Mypath) == "Datereceived")] <- "Dateofprocedure"
Mypath$Dateofprocedure <- as.Date(Mypath$Dateofprocedure)
save(Mypath, file = "/home/rstudio/EndoMineR/data/Mypath.rda")
}
#' ColonEndoRaw
#'
#' Creates raw Pathology reports
#' @param x None needed
#' @keywords Pathology reports
#' @import randomNames
#' @import generator
#' @export
#' @examples
#' ColonEndoRaw(x)
ColonEndoRaw <- function(x) {
# Date_of_Procedure <- generator::r_date_of_births(samplenumber,
# start = as.Date("2001-01-01"), end = as.Date("2017-01-01"))
Date <- paste("Date of procedure: ", Date_of_ProcedureAll)
EndoscopistList <- as.list(sample(randomNames::randomNames(
samplenumber,
"first", "last"
), 10, replace = T))
Second_EndoscopistList <- as.list(sample(randomNames::randomNames(
samplenumber,
"first", "last"
), 10, replace = T))
Endoscopist <- replicate(samplenumber, paste("Endoscopist: Dr. ",
sample(EndoscopistList, 1, replace = F),
sep = ""
))
Second_Endoscopist <- replicate(samplenumber, paste("2nd Endoscopist: Dr. ",
sample(Second_EndoscopistList, 1, replace = F),
sep = ""
))
MedicationsFent <- replicate(samplenumber, paste(
"Medications: Fentanyl ",
sample(list(
x = "12.5mcg", x = "25mcg", x = "50mcg",
x = "75mcg", x = "100mcg", x = "125mcg",
x = "150mcg"
), 1, replace = F)
))
MedicationsMidaz <- replicate(samplenumber, paste(
"Midazolam ",
sample(list(
x = "1mg", x = "2mg", x = "3mg",
x = "4mg", x = "5mg", x = "6mg", x = "7mg"
),
1,
replace = F
)
))
Instrument <- replicate(samplenumber, paste(
"Instrument: ",
sample(list(
x = "FC1", x = "FC2", x = "FC3",
x = "FC4", x = "FC5", x = "FC6", x = "FC7"
),
1,
replace = F
)
))
Extent_of_Exam <- replicate(samplenumber, paste(
"Extent of Exam: ",
sample(list(
x = "Failed intubation", x = "Recum",
x = "Sigmoid", x = "Descending Colon",
x = "Transverse Colon", x = "Ascending Colon",
x = "Caecum"
), 1, replace = F)
))
# Import the Findings text from data folder - but
# how to get it there?
INDICATIONS_FOR_EXAMINATION <- replicate(
samplenumber,
paste("Indications:", sample(list(
x1 = "Therapeutic- Dilatation",
x2 = "Other-", x3 = "Diarrrhoea", x4 = "Weight loss",
x5 = "IBD Surveillance", x6 = "PR Bleeding",
x7 = "Family History CRC", x8 = "Nausea and/or Vomiting",
x9 = "Abnormal Imaging", x10 = "Planned polypectomy",
x11 = "Fe deficiency anaemia", x12 = "Chronic abdominal pain"
),
1,
replace = F
))
)
PROCEDURE_PERFORMED <- "Procedure Performed: Colonoscopy"
FINDINGS <- read.table("/home/rstudio/EndoMineR/data-raw/data/FindingsTextColon.txt",
header = T, stringsAsFactors = F
)
FINDINGS <- replicate(samplenumber, paste(
"Findings: ",
stringr::str_c(as.list(sample(FINDINGS$x, sample(1:10),
replace = T
)), collapse = ",")
))
ENDOSCOPIC_DIAGNOSIS <- data.frame(c(
"Ulcer- Oesophageal. ",
"Post chemo-radiotherapy stricture ", "Possible achalasia.",
"Oesophagitis. ", "Food bolus obstructing the oesophagus.",
"Hiatus Hernia. ", "Extensive neoplastic looking esophageal lesion. ",
"Esophageal candidiasis ", "Barretts oesophagus. ",
"Gastritis"
), stringsAsFactors = F)
names(ENDOSCOPIC_DIAGNOSIS) <- "x"
ENDOSCOPIC_DIAGNOSIS <- replicate(
samplenumber,
paste("Endoscopic Diagnosis: ", stringr::str_c(as.list(sample(ENDOSCOPIC_DIAGNOSIS$x,
sample(1:3),
replace = F
)), collapse = ","))
)
# Now put it all together in one long text to
# simulate a real Endoscopic report
TheOGDReport <- data.frame(
NHS_Trust, HospitalNumberID,
Patient_Name, GeneralPractictioner, Date, Endoscopist,
Second_Endoscopist, MedicationsFent, MedicationsMidaz,
Instrument, Extent_of_Exam, INDICATIONS_FOR_EXAMINATION,
PROCEDURE_PERFORMED, FINDINGS, ENDOSCOPIC_DIAGNOSIS
)
# Now paste the OGD report dataframe together to
# make the fake report:
TheOGDReportFinal <- tidyr::unite(TheOGDReport,
cat(paste(colnames(TheOGDReport), collapse = "\n")),
colnames(TheOGDReport),
sep = "\n"
)
names(TheOGDReportFinal) <- "OGDReportWhole"
ColonFinal <- TheOGDReportFinal
# load(file = "ColonFinal.rda")
save(ColonFinal, file = "/home/rstudio/EndoMineR/data/ColonFinal.rda")
return(ColonFinal)
}
#' ColonpathRep
#'
#' Creates raw Pathology reports
#' @param x None needed
#' @import stringr
#' @keywords Pathology reports
#' @export
#' @examples
#' ColonpathRep(x)
ColonpathRep <- function(x) {
# Date_of_Procedure <- generator::r_date_of_births(samplenumber,
# start = as.Date("2001-01-01"), end = as.Date("2017-01-01"))
# Date <- paste("Date received: ", Date_of_Procedure +
# replicate(samplenumber, as.numeric(sample(1:10)),
# 1))
Date <- Date_of_ProcedureAll + sample(1:12, 1)
Date <- paste("Date received: ", Date)
# Clinical Details
ClinDet <- read.table("./data-raw/data/Histopath_ClinDetPhrasesColon.txt",
header = F, stringsAsFactors = F
)
ClinDet <- replicate(samplenumber, paste(
"Clinical Details: ",
stringr::str_c(as.list(sample(ClinDet$V1, sample(1:10),
replace = T
)), collapse = ",")
))
# Nature of the specimen
NatureOfSpec <- read.table("./data-raw/data/Histopath_MacDescripPhrasesColon.txt",
header = F, stringsAsFactors = F
)
NatureOfSpec <- replicate(samplenumber, paste(
sample(1:10, 1, replace = T), "specimen. Nature of specimen: ",
stringr::str_c(as.list(sample(NatureOfSpec$V1, sample(1:10), replace = T)), collapse = ",")
))
MacDescrip <- unlist(replicate(samplenumber, paste(
"Macroscopic description:",
sample(1:10, 1), "specimens collected the largest measuring",
sample(1:5, 1), "x", sample(1:5, 1), "x", sample(
1:5,
1
), "mm and the smallest", sample(
1:5,
1
), "x", sample(1:5, 1), "x", sample(
1:5,
1
), "mm"
), simplify = FALSE))
# Merge the strings together randomly
# MacDescrip<-replicate(1000,paste
# (sample(list.of.samples,1,replace=F),paste('Diagnoses',stringr::stringr::str_c(sample(line,sample(3:10,1),replace=F),collapse='.'))))
# Histol Details
Histol <- read.table("./data-raw/data/HistolTextColon",
header = F, stringsAsFactors = F
)
Histol <- replicate(samplenumber, paste(
"Histology: ",
stringr::str_c(as.list(sample(Histol$V1, sample(1:10),
replace = T
)), collapse = ",")
))
# Diagnostic details
Diagnostic <- read.table("./data-raw/data/Histopath_DxRawColon.txt",
header = F, stringsAsFactors = F
)
Diagnostic <- replicate(samplenumber, paste(
"Diagnosis: ",
stringr::str_c(as.list(sample(Diagnostic$V1, sample(5:10),
replace = T
)), collapse = ",")
))
PathDataFrameReport <- data.frame(
NHS_Trust, HospitalNumberID,
Patient_Name, Date_of_Birth, GeneralPractictioner,
Date, ClinDet, NatureOfSpec, MacDescrip, Histol,
Diagnostic
)
PathDataFrameFinalColon <- tidyr::unite(PathDataFrameReport,
cat(paste(colnames(PathDataFrameReport), collapse = "\n")),
colnames(PathDataFrameReport),
sep = "\n"
)
names(PathDataFrameFinalColon) <- "PathReportWhole"
# load(file = "./data_raw/data/PathDataFrameFinalColon.rda")
save(PathDataFrameFinalColon, file = "/home/rstudio/EndoMineR/data/PathDataFrameFinalColon.rda")
return(PathDataFrameFinalColon)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.