##### Fake Data Creation #####
# Endoscopies() creates a spreadsheet of Endoscopy data,Histop_df() creates a dataframe of
# Histopathology data. Endomerge() associates them together
# EndoRaw() creates an Endoscopy report (so the data has not already been extracted) for upper GI
# ColonEndoRaw() creates an Endoscopy report (so the data has not already been extracted) for lower GI
# pathRep() creates an Pathology report (so the data has not already been extracted) for upper GI
# ColonpathRep() creates an Pathology report (so the data has not already been extracted) for upper GI
#' FakeEndoCreator
#'
#' Creates fake endoscopic data to play with as a spreadsheet format. It assumes
#' that some of the data has already been separated out. EndoRaw() for upper GI
#' and ColonEndoRaw() for lower GI are better functions for the real
#' scenario of just getting the report as a series of unextracted text files.
#' (Their histology equivalents are pathRep and ColonpathRep respectively)
#' @param x None needed
#' @keywords Fake endoscopy
#' @export
#' @examples Endoscopies()
Endoscopies <- function(x) {
Endoscopist <- list(x1 = "Dr Jonny Begood", x2 = "Dr Elvis Presley",
x3 = "Dr Bilbo Baggins", x4 = "Dr Elmo Fudd",
x5 = "Dr Jimminey Cricket", x6 = "Dr Davy Jones",
x7 = "Dr Bugs Bunny", x8 = "Dr Rara Rasputin",
x9 = "Dr Chubby Checker", x10 = "Dr Frank Sinatra",
x11 = "Dr Charles Dickens", x12 = "Dr Joseph Conrad",
x13 = "Dr Florence Nightingale", x14 = "Dr Sal Addin",
x15 = "Dr King Richard III")
Midazolam <- list(x = "1mg", x = "2mg", x = "3mg",
x = "4mg", x = "5mg", x = "6mg", x = "7mg",
x = "8mg")
Fentanyl <- list(x = "12.5mcg", x = "25mcg", x = "50mcg",
x = "75mcg", x = "100mcg", x = "125mcg", x = "150mcg")
Indication <- list(x1 = "Therapeutic- Dilatation",
x2 = "Other-", x3 = "Follow-up ULCER HEALING",
x4 = "Haematemesis or Melaena/Blood PR", x5 = "Previous OGD ? 8 months ago",
x6 = "Dysphagia/Odynophagia", x7 = "Surveillance-Barrett's",
x8 = "Nausea and/or Vomiting", x9 = "Weight Loss",
x10 = "Dysphagia/intermittent for a few months",
x11 = "Other-", x12 = "Small Bowel Biopsy",
x13 = "Dyspepsia", x14 = "Reflux-like Symptoms/Atypical Chest Pain",
x15 = "chronic abdo pain and constipaton",
x16 = "Oesophagus- Dysplasia", x17 = "Therapeutic- RFA")
Diagnosis <- list(x1 = "Ulcer- Oesophageal. ",
x2 = "Post chemo-radiotherapy stricture ",
x3 = "Possible achalasia.", x4 = "Oesophagitis. ",
x5 = "Food bolus obstructing the oesophagus.",
x6 = "Hiatus Hernia. ", x7 = "Extensive neoplastic looking esophageal lesion. ",
x8 = "Esophageal candidiasis ", x9 = "Barretts oesophagus. ",
x10 = "Gastritis")
Endodat <- sample(seq(as.Date("2013/01/01"), as.Date("2017/05/01"),
by = "day"), 1000)
EndoHospNum <- sample(c("P433224", "P633443", "K522332",
"G244224", "S553322", "D0739033", "U873352",
"P223333", "Y763634", "I927282", "P223311",
"P029834", "U22415", "U234252", "S141141",
"O349253", "T622722", "J322909", "F630230",
"T432452"), 1000, replace = TRUE)
# Yes I know... This was just easier..
BarrettsLength <- c("C0M1", "C0M2", "C0M3", "C0M4",
"C0M5", "C0M6", "C0M7", "C0M8", "C0M9", "C0M10",
"C1M2", "C1M3", "C1M4", "C1M5", "C1M6", "C1M7",
"C1M8", "C1M9", "C1M10", "C2M3", "C2M4", "C2M5",
"C2M6", "C2M7", "C2M8", "C2M9", "C2M10", "C3M4",
"C3M5", "C3M6", "C3M7", "C3M8", "C3M9", "C3M10",
"C4M5", "C4M6", "C4M7", "C4M8", "C4M9", "C4M10",
"C5M6", "C5M7", "C5M8", "C5M9", "C5M10", "C6M7",
"C6M8", "C6M9")
# Merge them all together into a dataframe
Endoscopies <- data.frame(EndoHospNum, replicate(1000,
paste("Date of Procedure", sample(Endodat,
1, replace = F), " Endoscopist: ", sample(Endoscopist,
1, replace = F), "Midazolam: ", sample(Midazolam,
1, replace = F), "Fentanyl: ", sample(Fentanyl,
1, replace = F), "Indication:", sample(Indication,
1, replace = F), "Diagnosis:", stringr::str_c(sample(Diagnosis,
sample(1:10, 1), replace = F), collapse = "."),
sample(c("", paste("Barrett's oesophagus length:",
sample(BarrettsLength, 1))), 1))))
# Lets rename the one column to something more
# intelligent
names(Endoscopies) <- c("HospNum_Id", "EndoReports")
######### Data accordionisation Convert into paragraphs so
######### can be more easily separated
Endoscopies$Date <- stringr::str_extract(Endoscopies$EndoReports,
"Date of Procedure.*Endoscopist")
Endoscopies$Endoscopist <- stringr::str_extract(Endoscopies$EndoReports,
"Endoscopist:.*Midazolam")
Endoscopies$Midazolam <- stringr::str_extract(Endoscopies$EndoReports,
"Midazolam:.*Fentanyl")
Endoscopies$Fentanyl <- stringr::str_extract(Endoscopies$EndoReports,
"Fentanyl:.*Indication")
Endoscopies$Indication <- stringr::str_extract(Endoscopies$EndoReports,
"Indication:.*Diagnosis")
Endoscopies$Diagnosis <- stringr::str_extract(Endoscopies$EndoReports,
"Diagnosis:.*")
Endoscopies$BarrC <- stringr::str_extract(Endoscopies$EndoReports,
" oesophagus length: C.*M.*")
Endoscopies$BarrM <- stringr::str_extract(Endoscopies$BarrC,
"M.*")
######### Data cleaning Endoscopy dataset and formatting
######### the columns
Endoscopies$Date <- gsub("Date of Procedure", "",
Endoscopies$Date)
# Note we are using the date conversion function
# here
Endoscopies$Date <- as.Date(gsub(" Endoscopist",
"", Endoscopies$Date), format = "%Y-%m-%d")
Endoscopies$Endoscopist <- gsub("Endoscopist: Dr ",
"", Endoscopies$Endoscopist)
Endoscopies$Endoscopist <- gsub("Midazolam", "",
Endoscopies$Endoscopist)
Endoscopies$Midazolam <- gsub("Midazolam: ", "",
Endoscopies$Midazolam)
# Also reformatting this column into a nueric
# column at the same time
Endoscopies$Midazolam <- as.numeric(gsub("mg Fentanyl",
"", Endoscopies$Midazolam))
Endoscopies$Fentanyl <- gsub("Fentanyl: ", "",
Endoscopies$Fentanyl)
# Also reformatting this column into a nueric
# column at the same time
Endoscopies$Fentanyl <- as.numeric(gsub("mcg Indication",
"", Endoscopies$Fentanyl))
Endoscopies$Indication <- gsub("Indication: ",
"", Endoscopies$Indication)
Endoscopies$Indication <- gsub(" Diagnosis", "",
Endoscopies$Indication)
Endoscopies$Diagnosis <- gsub("Indication: ", "",
Endoscopies$Diagnosis)
Endoscopies$Diagnosis <- gsub(" Diagnosis", "",
Endoscopies$Diagnosis)
Endoscopies$BarrC <- gsub("oesophagus length: ",
"", Endoscopies$BarrC)
# Also reformatting this column into a nueric
# column at the same time
Endoscopies$BarrC <- gsub("M.*", "", Endoscopies$BarrC)
Endoscopies$BarrC <- as.numeric(gsub("C", "", Endoscopies$BarrC))
# Also reformatting this column into a nueric
# column at the same time
Endoscopies$BarrM <- as.numeric(gsub("M", "", Endoscopies$BarrM))
# load(file = "Endoscopies.rda")
return(Endoscopies)
}
#' FakeHistolCreator
#'
#' Creates fake histology data to play with
#' @param x None needed
#' @keywords Fake histology spreadshet data
#' @export
#' @examples Histop_df()
Histop_df <- function(x) {
#Generate a load of strings
line <- list(x1 = "Intestinal metaplasia is present.",
x2 = "Basal hyperplasia is prominent", x3 = "There is no dysplasia or malignancy.",
x4 = "No Helicobacter are seen.", x5 = "There is some ulceration.",
x6 = "There is no intercellular oedema in the surface epithelium.",
x7 = " PAS staining shows occasional spores, consistent with candida.",
x8 = " No herpetic viral inclusions are seen.",
x9 = " There is no dysplasia and no invasive carcinoma.",
x10 = " There is mild regenerative epithelial change, but neither dysplasia nor malignancy is seen.",
x11 = "The appearances are consistent with the endoscopic diagnosis of Barrett's oesophagus with active chronic inflammation.",
x12 = "The biopsies of oesophageal squamous mucosa show surface erosion and active chronic inflammation.",
x13 = "Numerous Candida spores and hyphae are present admixed with ulcer slough.",
x14 = "There is reactive basal cell hyperplasia and mild inflammatory epithelial atypia.",
x15 = "There is no significant increase in intraepithelial eosinophils.",
x16 = "No granulomas or viral inclusions are seen.",
x17 = "The appearances are those of Candida oesophagitis.",
x18 = "Neither dysplasia nor malignancy is seen.",
x19 = "The appearances are consistent with, but not specific for Barrett's (columnar lined) oesophagus.",
x20 = "High grade dysplasia is present throughout this sample",
x21 = "There is low grade dysplasia", x22 = "This is a dysplastic sample")
list.of.samples <- replicate(1000, paste("Macrosopic description:",
sample(1:10, 1), "specimens collected the largest measuring",
sample(1:5, 1), "x", sample(1:5, 1), "x", sample(1:5,
1), "mm and the smallest", sample(1:5,
1), "x", sample(1:5, 1), "x", sample(1:5,
1), "mm"), simplify = FALSE)
# Merge the strings together randomly
histop <- replicate(1000, paste(sample(list.of.samples,
1, replace = F), paste("Diagnoses", stringr::str_c(sample(line,
sample(3:10, 1), replace = F), collapse = "."))))
# Because we eventually will merge histopath and
# endoscopy together we are going to be crafty and
# generate the histopath dates from the endoscopy
# dates with 0-2 days difference
dat <- Endoscopies$Date + sample(0:2, 1)
dat <- sample(seq(as.Date("2013/01/01"), as.Date("2017/05/01"),
by = "day"), 1000)
# Generate hospital numbers from the Endoscopies
# report
HospNum_Id <- Endoscopies$HospNum_Id
Histop_df <- data.frame(HospNum_Id, dat, paste("Date received:",
dat, histop))
names(Histop_df) <- c("HospNum_Id", "dat", "HistoReport")
######### Data accordionisation Convert into paragraphs so
######### can be more easily separated
Histop_df$Date <- stringr::str_extract(Histop_df$HistoReport,
"Date received:.*Macrosopic description:")
Histop_df$Macro <- stringr::str_extract(Histop_df$HistoReport,
"Macrosopic description:.*Diagnoses")
Histop_df$Diagnoses <- stringr::str_extract(Histop_df$HistoReport,
"Diagnoses.*")
######### Data cleaning Histopathology dataset and
######### formatting the columns
Histop_df$Date <- gsub("Date received: ", "", Histop_df$Date)
Histop_df$Date <- as.Date(gsub("Macrosopic description:",
"", Histop_df$Date), format = "%Y-%m-%d")
Histop_df$Macro <- gsub("Macrosopic description: ",
"", Histop_df$Macro)
Histop_df$Macro <- gsub("Diagnoses", "", Histop_df$Macro)
Histop_df$Diagnoses <- gsub("Diagnoses", "", Histop_df$Diagnoses)
# Lets get rid of a column we don't need
Histop_df$dat <- NULL
# load(file = "Histop_df.rda")
return(Histop_df)
}
######### Data merging We can merge straight away as we
######### have the same names for the columns date and
######### HospNum_Id so no need to mess around. We will use
######### the fuzzyjoin method as there is sometimes a gap
######### between the endoscopy date and the date that the
######### histopathology was received:
samplenumber <- 2000
HospitalNumberID <- paste("Hospital Number: ", sample(c(LETTERS)),
sample(1e+06:9999999, (samplenumber - 1900), replace = T),
sep = "")
NHS_Trust <- replicate(samplenumber, c("Hospital: Random NHS Foundation Trust"))
Patient_Name <- paste("Patient Name: ", randomNames::randomNames(samplenumber,
"first", "last"))
Date_of_Birth <- paste("DOB: ", generator::r_date_of_births(samplenumber,
start = as.Date("1900-01-01"), end = as.Date("1999-01-01")))
GeneralPractictioner <- paste("General Practitioner: Dr. ",
randomNames::randomNames(samplenumber, "first", "last"), sep = "")
Date_of_ProcedureAll <- generator::r_date_of_births(samplenumber,
start = as.Date("2001-01-01"), end = as.Date("2017-01-01"))
#' EndoRaw
#'
#' Generates fake Endoscopy date
#' @param x None needed
#' @keywords fake endoscopy data
#' @import randomNames
#' @import generator
#' @export
#' @examples EndoRaw(x)
EndoRaw2 <- function() {
Date_of_Procedure<-Date_of_ProcedureAll
Date <- paste("Date of procedure: ", Date_of_Procedure)
EndoscopistList <- as.list(sample(randomNames::randomNames(samplenumber,
"first", "last"), 10, replace = T))
Second_EndoscopistList <- as.list(sample(randomNames::randomNames(samplenumber,
"first", "last"), 10, replace = T))
Endoscopist <- replicate(samplenumber, paste("Endoscopist: Dr. ",
sample(EndoscopistList, 1, replace = F), sep = ""))
Second_Endoscopist <- replicate(samplenumber, paste("2nd Endoscopist: Dr. ",
sample(Second_EndoscopistList, 1, replace = F),
sep = ""))
MedicationsFent <- replicate(samplenumber, paste("Medications: Fentanyl ",
sample(list(x = "12.5mcg", x = "25mcg", x = "50mcg",
x = "75mcg", x = "100mcg", x = "125mcg",
x = "150mcg"), 1, replace = F)))
MedicationsMidaz <- replicate(samplenumber, paste("Midazolam ",
sample(list(x = "1mg", x = "2mg", x = "3mg",
x = "4mg", x = "5mg", x = "6mg", x = "7mg"),
1, replace = F)))
Instrument <- replicate(samplenumber, paste("Instrument: ",
sample(list(x = "FG1", x = "FG2", x = "FG3",
x = "FG4", x = "FG5", x = "FG6", x = "FG7"),
1, replace = F)))
Extent_of_Exam <- replicate(samplenumber, paste("Extent of Exam: ",
sample(list(x = "Failed intubation", x = "Oesophagus",
x = "Stomach body", x = "D1", x = "D2",
x = "Pylorus", x = "GOJ"), 1, replace = F)))
# Import the Findings text from data folder - but
# how to get it there?
INDICATIONS_FOR_EXAMINATION <- replicate(samplenumber,
paste("Indications:", sample(list(x1 = "Therapeutic- Dilatation",
x2 = "Other-", x3 = "Follow-up ULCER HEALING",
x4 = "Haematemesis or Melaena/Blood PR",
x5 = "Previous OGD ? 8 months ago", x6 = "Dysphagia/Odynophagia",
x7 = "Surveillance-Barrett's", x8 = "Nausea and/or Vomiting",
x9 = "Weight Loss", x10 = "Dysphagia/intermittent for a few months",
x11 = "Other-", x12 = "Small Bowel Biopsy",
x13 = "Dyspepsia", x14 = "Reflux-like Symptoms/Atypical Chest Pain",
x15 = "chronic abdo pain and constipaton",
x16 = "Oesophagus- Dysplasia", x17 = "Therapeutic- RFA"),
1, replace = F)))
PROCEDURE_PERFORMED <- "Procedure Performed: Gastroscopy (OGD)"
FINDINGS <- read.table("/home/rstudio/EndoMineR/data-raw/data/FindingsText",
header = T, stringsAsFactors = F)
FINDINGS <- replicate(samplenumber, paste("Findings: ",
stringr::str_c(as.list(sample(FINDINGS$x, sample(1:10),
replace = T)), collapse = ",")))
TherapyorNot <- replicate(samplenumber,
paste(sample(list(x1 = "Therapeutic- Dilatation was performed",
x2 = "", x3 = "HALO 90 done with good effect",
x4 = "TTS HALO to area",
x5 = "", x6 = "",
x7 = "A lesion underwent EMR", x8 = "",
x9 = "", x10 = "",
x11 = "", x12 = "Area APC'd",
x13 = "", x14 = "",
x15 = "",
x16 = "", x17 = "Therapeutic- RFA",x18= "",
x19= "",x20= "",x21= "",x22= "",x23= "",x24= "",x25= ""
),
1, replace = F)))
ENDOSCOPIC_DIAGNOSIS <- data.frame(c("Ulcer- Oesophageal. ",
"Post chemo-radiotherapy stricture ", "Possible achalasia.",
"Oesophagitis. ", "Food bolus obstructing the oesophagus.",
"Hiatus Hernia. ", "Extensive neoplastic looking esophageal lesion. ",
"Esophageal candidiasis ", "Barretts oesophagus. ",
"Gastritis"), stringsAsFactors = F)
names(ENDOSCOPIC_DIAGNOSIS) <- "x"
ENDOSCOPIC_DIAGNOSIS <- replicate(samplenumber,
paste("Endoscopic Diagnosis: ", stringr::str_c(as.list(sample(ENDOSCOPIC_DIAGNOSIS$x,
sample(1:3), replace = F)), collapse = ",")))
# Now put it all together in one long text to
# simulate a real Endoscopic report
TheOGDReport <- data.frame(NHS_Trust, HospitalNumberID,
Patient_Name, GeneralPractictioner, Date, Endoscopist,
Second_Endoscopist, MedicationsFent, MedicationsMidaz,
Instrument, Extent_of_Exam, INDICATIONS_FOR_EXAMINATION,
PROCEDURE_PERFORMED, FINDINGS, TherapyorNot,ENDOSCOPIC_DIAGNOSIS)
# Now paste the OGD report dataframe together to
# make the fake report:
TheOGDReportFinal <- tidyr::unite(TheOGDReport,
cat(paste(colnames(TheOGDReport), collapse = "\n")),
colnames(TheOGDReport), sep = "\n")
names(TheOGDReportFinal) <- "OGDReportWhole"
save(TheOGDReportFinal,file = "/home/rstudio/EndoMineR/data/TheOGDReportFinal.rda")
# return(TheOGDReportFinal)
Myendo<-TheOGDReportFinal
Myendo$OGDReportWhole<-gsub('2nd Endoscopist:','Second endoscopist:',Myendo$OGDReportWhole)
EndoscTree<-list('Hospital Number:','Patient Name:','General Practitioner:',
'Date of procedure:','Endoscopist:','Second endoscopist:','Medications',
'Instrument','Extent of Exam:','Indications:','Procedure Performed:','Findings:',
'Endoscopic Diagnosis:')
for(i in 1:(length(EndoscTree)-1)) {
Myendo<-Extractor(Myendo,'OGDReportWhole',as.character(EndoscTree[i]),
as.character(EndoscTree[i+1]),as.character(EndoscTree[i]))
}
Myendo$Dateofprocedure<-as.Date(Myendo$Dateofprocedure)
save(Myendo,file = "/home/rstudio/EndoMineR/data/Myendo.rda")
}
#' pathRep
#'
#' Creates raw Pathology reports
#' @param x None needed
#' @import stringr
#' @import generator
#' @keywords Pathology reports
#' @export
#' @examples pathRep(x)
pathRep2 <- function() {
AccessionNum<-paste0("SP-",sample(10:99),"-",sample(1000000:9999999,2000,replace=F))
Date <- Date_of_ProcedureAll+sample(1:12,1)
Date <- paste("Date received: ", Date)
# replicate(samplenumber, as.numeric(sample(1:10)),
# 1))
# Clinical Details
ClinDet <- read.table("./data-raw/data/HistolClinDetText",
header = T, stringsAsFactors = F)
ClinDet <- replicate(samplenumber, paste("Clinical Details: ",
stringr::str_c(as.list(sample(ClinDet$x, sample(1:10),
replace = T)), collapse = ",")))
# Nature of the specimen
NatureOfSpec <- read.table("./data-raw/data/HistolMacDescription.txt",
header = T, stringsAsFactors = F)
NatureOfSpec <- replicate(samplenumber, paste(sample(1:10,1,replace=T),"specimen. Nature of specimen: ",
stringr::str_c(as.list(sample(NatureOfSpec$x, sample(1:10), replace = T)), collapse = ",")))
MacDescrip <- unlist(replicate(samplenumber, paste("Macroscopic description:",
sample(1:10, 1), "specimens collected the largest measuring",
sample(1:5, 1), "x", sample(1:5, 1), "x", sample(1:5,1), "mm and the smallest", sample(1:5,1), "x", sample(1:5, 1), "x", sample(1:5, 1), "mm"), simplify = FALSE))
# Merge the strings together randomly
# Histol Details
Histol <- read.table("./data-raw/data/HistolText",header = T, stringsAsFactors = F)
Histol <- replicate(samplenumber, paste("Histology: ",
stringr::str_c(as.list(sample(Histol$x, sample(1:10),
replace = T)), collapse = ",")))
Diagnostic <- read.table("./data-raw/data/HistolDxText",
header = T, stringsAsFactors = F)
Diagnostic <- replicate(samplenumber, paste("Diagnosis: ",
stringr::str_c(as.list(sample(Diagnostic$x, sample(5:10),
replace = T)), collapse = ",")))
PathDataFrameReport <- data.frame(AccessionNum,NHS_Trust, HospitalNumberID,
Patient_Name, Date_of_Birth, GeneralPractictioner,
Date, ClinDet, NatureOfSpec, MacDescrip, Histol,
Diagnostic)
PathDataFrameFinal <- tidyr::unite(PathDataFrameReport,
cat(paste(colnames(PathDataFrameReport), collapse = "\n")),
colnames(PathDataFrameReport), sep = "\n")
names(PathDataFrameFinal) <- "PathReportWhole"
save(PathDataFrameFinal,file = "/home/rstudio/EndoMineR/data/PathDataFrameFinal.rda")
#################
Mypath<-PathDataFrameFinal
HistolTree<-list('Hospital Number','Patient Name','DOB:',
'General Practitioner:',
'Date received:','Clinical Details:','Macroscopic description:','Histology:',
'Diagnosis:','')
for(i in 1:(length(HistolTree)-1)) {
Mypath<-Extractor(Mypath,'PathReportWhole',as.character(HistolTree[i]),
as.character(HistolTree[i+1]),as.character(HistolTree[i]))
}
colnames(Mypath)[which(names(Mypath) == "Datereceived")] <- "Dateofprocedure"
Mypath$Dateofprocedure<-as.Date(Mypath$Dateofprocedure)
save(Mypath,file = "/home/rstudio/EndoMineR/data/Mypath.rda")
}
#' ColonEndoRaw
#'
#' Creates raw Pathology reports
#' @param x None needed
#' @keywords Pathology reports
#' @import randomNames
#' @import generator
#' @export
#' @examples ColonEndoRaw(x)
ColonEndoRaw <- function(x) {
#Date_of_Procedure <- generator::r_date_of_births(samplenumber,
# start = as.Date("2001-01-01"), end = as.Date("2017-01-01"))
Date <- paste("Date of procedure: ", Date_of_ProcedureAll)
EndoscopistList <- as.list(sample(randomNames::randomNames(samplenumber,
"first", "last"), 10, replace = T))
Second_EndoscopistList <- as.list(sample(randomNames::randomNames(samplenumber,
"first", "last"), 10, replace = T))
Endoscopist <- replicate(samplenumber, paste("Endoscopist: Dr. ",
sample(EndoscopistList, 1, replace = F), sep = ""))
Second_Endoscopist <- replicate(samplenumber, paste("2nd Endoscopist: Dr. ",
sample(Second_EndoscopistList, 1, replace = F),
sep = ""))
MedicationsFent <- replicate(samplenumber, paste("Medications: Fentanyl ",
sample(list(x = "12.5mcg", x = "25mcg", x = "50mcg",
x = "75mcg", x = "100mcg", x = "125mcg",
x = "150mcg"), 1, replace = F)))
MedicationsMidaz <- replicate(samplenumber, paste("Midazolam ",
sample(list(x = "1mg", x = "2mg", x = "3mg",
x = "4mg", x = "5mg", x = "6mg", x = "7mg"),
1, replace = F)))
Instrument <- replicate(samplenumber, paste("Instrument: ",
sample(list(x = "FC1", x = "FC2", x = "FC3",
x = "FC4", x = "FC5", x = "FC6", x = "FC7"),
1, replace = F)))
Extent_of_Exam <- replicate(samplenumber, paste("Extent of Exam: ",
sample(list(x = "Failed intubation", x = "Recum",
x = "Sigmoid", x = "Descending Colon",
x = "Transverse Colon", x = "Ascending Colon",
x = "Caecum"), 1, replace = F)))
# Import the Findings text from data folder - but
# how to get it there?
INDICATIONS_FOR_EXAMINATION <- replicate(samplenumber,
paste("Indications:", sample(list(x1 = "Therapeutic- Dilatation",
x2 = "Other-", x3 = "Diarrrhoea", x4 = "Weight loss",
x5 = "IBD Surveillance", x6 = "PR Bleeding",
x7 = "Family History CRC", x8 = "Nausea and/or Vomiting",
x9 = "Abnormal Imaging", x10 = "Planned polypectomy",
x11 = "Fe deficiency anaemia", x12 = "Chronic abdominal pain"),
1, replace = F)))
PROCEDURE_PERFORMED <- "Procedure Performed: Colonoscopy"
FINDINGS <- read.table("/home/rstudio/EndoMineR/data-raw/data/FindingsTextColon.txt",
header = T, stringsAsFactors = F)
FINDINGS <- replicate(samplenumber, paste("Findings: ",
stringr::str_c(as.list(sample(FINDINGS$x, sample(1:10),
replace = T)), collapse = ",")))
ENDOSCOPIC_DIAGNOSIS <- data.frame(c("Ulcer- Oesophageal. ",
"Post chemo-radiotherapy stricture ", "Possible achalasia.",
"Oesophagitis. ", "Food bolus obstructing the oesophagus.",
"Hiatus Hernia. ", "Extensive neoplastic looking esophageal lesion. ",
"Esophageal candidiasis ", "Barretts oesophagus. ",
"Gastritis"), stringsAsFactors = F)
names(ENDOSCOPIC_DIAGNOSIS) <- "x"
ENDOSCOPIC_DIAGNOSIS <- replicate(samplenumber,
paste("Endoscopic Diagnosis: ", stringr::str_c(as.list(sample(ENDOSCOPIC_DIAGNOSIS$x,
sample(1:3), replace = F)), collapse = ",")))
# Now put it all together in one long text to
# simulate a real Endoscopic report
TheOGDReport <- data.frame(NHS_Trust, HospitalNumberID,
Patient_Name, GeneralPractictioner, Date, Endoscopist,
Second_Endoscopist, MedicationsFent, MedicationsMidaz,
Instrument, Extent_of_Exam, INDICATIONS_FOR_EXAMINATION,
PROCEDURE_PERFORMED, FINDINGS, ENDOSCOPIC_DIAGNOSIS)
# Now paste the OGD report dataframe together to
# make the fake report:
TheOGDReportFinal <- tidyr::unite(TheOGDReport,
cat(paste(colnames(TheOGDReport), collapse = "\n")),
colnames(TheOGDReport), sep = "\n")
names(TheOGDReportFinal) <- "OGDReportWhole"
ColonFinal<-TheOGDReportFinal
# load(file = "ColonFinal.rda")
save(ColonFinal,file = "/home/rstudio/EndoMineR/data/ColonFinal.rda")
return(ColonFinal)
}
#' ColonpathRep
#'
#' Creates raw Pathology reports
#' @param x None needed
#' @import stringr
#' @keywords Pathology reports
#' @export
#' @examples ColonpathRep(x)
ColonpathRep <- function(x) {
#Date_of_Procedure <- generator::r_date_of_births(samplenumber,
# start = as.Date("2001-01-01"), end = as.Date("2017-01-01"))
# Date <- paste("Date received: ", Date_of_Procedure +
# replicate(samplenumber, as.numeric(sample(1:10)),
# 1))
Date <- Date_of_ProcedureAll+sample(1:12,1)
Date <- paste("Date received: ", Date)
# Clinical Details
ClinDet <- read.table("./data-raw/data/Histopath_ClinDetPhrasesColon.txt",
header = F, stringsAsFactors = F)
ClinDet <- replicate(samplenumber, paste("Clinical Details: ",
stringr::str_c(as.list(sample(ClinDet$V1, sample(1:10),
replace = T)), collapse = ",")))
# Nature of the specimen
NatureOfSpec <- read.table("./data-raw/data/Histopath_MacDescripPhrasesColon.txt",
header = F, stringsAsFactors = F)
NatureOfSpec <- replicate(samplenumber, paste(sample(1:10,1,replace=T),"specimen. Nature of specimen: ",
stringr::str_c(as.list(sample(NatureOfSpec$V1, sample(1:10), replace = T)), collapse = ",")))
MacDescrip <- unlist(replicate(samplenumber, paste("Macroscopic description:",
sample(1:10, 1), "specimens collected the largest measuring",
sample(1:5, 1), "x", sample(1:5, 1), "x", sample(1:5,
1), "mm and the smallest", sample(1:5,
1), "x", sample(1:5, 1), "x", sample(1:5,
1), "mm"), simplify = FALSE))
# Merge the strings together randomly
# MacDescrip<-replicate(1000,paste
# (sample(list.of.samples,1,replace=F),paste('Diagnoses',stringr::stringr::str_c(sample(line,sample(3:10,1),replace=F),collapse='.'))))
# Histol Details
Histol <- read.table("./data-raw/data/HistolTextColon",
header = F, stringsAsFactors = F)
Histol <- replicate(samplenumber, paste("Histology: ",
stringr::str_c(as.list(sample(Histol$V1, sample(1:10),
replace = T)), collapse = ",")))
# Diagnostic details
Diagnostic <- read.table("./data-raw/data/Histopath_DxRawColon.txt",
header = F, stringsAsFactors = F)
Diagnostic <- replicate(samplenumber, paste("Diagnosis: ",
stringr::str_c(as.list(sample(Diagnostic$V1, sample(5:10),
replace = T)), collapse = ",")))
PathDataFrameReport <- data.frame(NHS_Trust, HospitalNumberID,
Patient_Name, Date_of_Birth, GeneralPractictioner,
Date, ClinDet, NatureOfSpec, MacDescrip, Histol,
Diagnostic)
PathDataFrameFinalColon <- tidyr::unite(PathDataFrameReport,
cat(paste(colnames(PathDataFrameReport), collapse = "\n")),
colnames(PathDataFrameReport), sep = "\n")
names(PathDataFrameFinalColon) <- "PathReportWhole"
# load(file = "./data_raw/data/PathDataFrameFinalColon.rda")
save(PathDataFrameFinalColon,file = "/home/rstudio/EndoMineR/data/PathDataFrameFinalColon.rda")
return(PathDataFrameFinalColon)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.