library(dplyr)
library(stringr)
library(here)
library(EndoMineR)
library(readxl)
library(tidyr)
library(PhysiMineR)
library(finalfit)
library(knitr)
library(kableExtra)
library(cutpointr)
library(ggbeeswarm)
library(ggplot2)
library(gridExtra)
library(gtsummary)
library(gt)
library(matrixStats)
library(ggpubr)
library(flextable)
library(knitr)
library(here)
library(reshape)
#library(checkpoint)
#checkpoint("2020-08-14")
##### Import ####
## @knitr dataImport
oldw <- getOption("warn")
options(warn = -1)
#Get this from the data folder for PhysiMineR- just click on the RData to load into
#the global environment
#Or if using the local file system
BravoDayOneAndTwo <- read_excel(here::here("inst/Projects/BRAVOStudies/NegImpPredictorsOfAllPosBRAVO/data/BravoDay1And2.xls"))
BravoDayThreeAndFour <- read_excel(here::here("inst/Projects/BRAVOStudies/NegImpPredictorsOfAllPosBRAVO/data/BravoDay3And4.xls"))
BRAVOTotal <- read_excel(here::here("inst/Projects/BRAVOStudies/NegImpPredictorsOfAllPosBRAVO/data/BRAVOTotal.xls"))
#Diag <- read_excel(here::here("inst/Projects/BRAVOStudies/NegImpPredictorsOfAllPosBRAVO/data/Diag.xls"))
HRMImportMain <- read_excel(here::here("inst/Projects/BRAVOStudies/NegImpPredictorsOfAllPosBRAVO/data/HRMImportMain.xls"))
HRMImportSwallows <- read_excel(here::here("inst/Projects/BRAVOStudies/NegImpPredictorsOfAllPosBRAVO/data/HRMImportSwallows.xls"))
Imp_Symp <- read_excel(here::here("inst/Projects/BRAVOStudies/NegImpPredictorsOfAllPosBRAVO/data/Imp_Symp.xls"))
ImpedanceTwo <- read_excel(here::here("inst/Projects/BRAVOStudies/NegImpPredictorsOfAllPosBRAVO/data/Impedance2.xls"))
Procs <- read_excel(here::here("inst/Projects/BRAVOStudies/NegImpPredictorsOfAllPosBRAVO/data/ProcByHospitalNumbers.xls"))
Procs<-Procs%>%dplyr::rename(Grade='Oesophagitis Grade')
PSPW<-readxl::read_xlsx((here::here("inst/Projects/BRAVOStudies/NegImpPredictorsOfAllPosBRAVO/data/Seb_PSPW_list (003).xlsx")))
PSPW<-PSPW%>%dplyr::rename(HospitalNumber=HospitalNum)
MNBI<-read_excel(here::here("inst/Projects/BRAVOStudies/NegImpPredictorsOfAllPosBRAVO/data/MNBI.xlsx"))
MNBI2<-read_excel(here::here("inst/Projects/BRAVOStudies/NegImpPredictorsOfAllPosBRAVO/data/MNBI2.xlsx"))
MNBI<-rbind(MNBI,MNBI2)
MNBI<-merge(MNBI,PSPW,by="HospitalNumber",all.x=TRUE)
#Make compatibile with Hospital Number column below:
MNBI<-MNBI%>%dplyr::rename(HospNum_Id=HospitalNumber)
options(warn = oldw)
##### Clean ####################################################################################################################
## @knitr dataClean
#Data cleaning
#Use after PhysiData_Acq
#Clean HRM
#Clean up the HRM swallows:
HRMImportSwallows<-HRMImportSwallows[!is.na(HRMImportSwallows$panesophagealpressurizationMapSwallowsNum8),]
##### IntraTestMergeAndClean ########################################################################################
## @knitr dataIntraTestMerge
# Get the whole impedance dataset
#You will need to re-clean the merged dataImpSympImpedance as cleaning function needed to be fixed
AllImpedance<-merge(ImpedanceTwo,Imp_Symp,by="Imp_Id", all = TRUE)
ImpedanceThree<-dataImpClean(AllImpedance)
ImpAll<-dataImpSymptoms(ImpedanceThree)
ImpAll<-GORD_AcidImp(ImpAll)
#Diag<-dataDiagClean(Diag)
#Merge the HRM results together (swallows and Main)
AllHRM<-merge(HRMImportMain,HRMImportSwallows,by="HRM_Id",all=TRUE)
#Get rid of the swallows for now as using the whole HRM dataset makes things slow:
AllHRM<-AllHRM[,!grepl("Num\\d+",colnames(AllHRM))]
#Roll up the repeats in to one:
HRMImportMainTwo<-AllHRM %>%
dplyr::group_by(HospNum_Id,DistalLESfromnarescm)%>%
dplyr::summarise_all(.funs = function(x) paste(unique(c(dplyr::lag(x), x)), collapse = ":"))
#Clean Up the main HRM
HRMImportMainTwo<-HRMCleanUp1(HRMImportMainTwo)
#Merge the BRAVO results together:
AllBravo<-merge(BravoDayOneAndTwo,BravoDayThreeAndFour,by="BravoID",all=TRUE)
AllBravo<-merge(AllBravo,BRAVOTotal,by="BravoID",all=TRUE)
AllBravo<-dataBRAVOClean(AllBravo)
AllBravo<-dataBRAVODayLabeller(AllBravo,"HospNum_Id","VisitDate")
AllBravo<-dataBRAVOSymptoms(AllBravo)
#AllBravo<-GORD_AcidBRAVO(AllBravo)
AllBravo<-GORD_BravoWDAAndAverage(AllBravo,4.3)
#Get the BRAVO procedures
Procs<-data.frame(Procs,stringsAsFactors = FALSE)
Procs$DATEOFPROCEDURE<-as.Date(Procs$DATEOFPROCEDURE)
names(Procs)<-c("HospNum_Id","VisitDate.x","Findings","Oesophagitis","Grade")
Procs<-data.frame(Procs,stringsAsFactors = FALSE)
#MNBI<-MNBI%>%rename(HospNum_Id=HospitalNumber)
MNBI<-MNBI%>%dplyr::select(HospNum_Id,MethodOne_average,pspw_index,comments)%>%dplyr::rename(M1_av=MethodOne_average)
#Only select the MNBI final outputs from the two methods:
#MNBI$M2_noct<-as.numeric(MNBI$M2_noct)
MNBI$M1_av<-as.numeric(MNBI$M1_av)
##### CrossTestMerge ########################################################################################
## @knitr dataCrossTestMerge
#Merge all HRM with AllBravo to get the full dataset
#This merges the tests so that the nearest hrm to the BRAVO is chosen to avoid replication
#Note that a lot of Bravo's do not have HRM as may not have been requested by outside referrers
#Impedance Merges:
ImpAndhrm <- HRMImportMainTwo %>% inner_join(ImpAll, by ="HospNum_Id") %>%
mutate(Date_ABS_Diff = abs(VisitDate.x - VisitDate.y)) %>%
arrange(HospNum_Id, Date_ABS_Diff) %>%
group_by(HospNum_Id) %>%
slice(1)
ImpAndBravo <- AllBravo %>% inner_join(ImpAll, by ="HospNum_Id") %>%
mutate(Date_ABS_Diff = abs(VisitDate.x - VisitDate.y)) %>%
arrange(HospNum_Id, Date_ABS_Diff) %>%
group_by(HospNum_Id) %>%
slice(1)
#ImpAnddiag <- Diag %>% inner_join(ImpAll, by ="HospNum_Id") %>%
# mutate(Date_ABS_Diff = abs(VisitDate.x - VisitDate.y)) %>%
# arrange(HospNum_Id, Date_ABS_Diff) %>%
# group_by(HospNum_Id) %>%
# slice(1)
#BRAVO Merges:
BravoAnd_hrm <- HRMImportMainTwo %>% inner_join(AllBravo, by ="HospNum_Id") %>%
mutate(Date_ABS_Diff = abs(VisitDate.x - VisitDate.y)) %>%
arrange(HospNum_Id, Date_ABS_Diff) %>%
group_by(HospNum_Id) %>%
slice(1)
#BravoAnd_diag <- Diag %>% inner_join(AllBravo, by ="HospNum_Id") %>%
# mutate(Date_ABS_Diff = abs(VisitDate.x - VisitDate.y)) %>%
# arrange(HospNum_Id, Date_ABS_Diff) %>%
# group_by(HospNum_Id) %>%
# slice(1)
#Diag Merges:
#HRMAnd_diag <- HRMImportMainTwo %>% inner_join(Diag, by ="HospNum_Id") %>%
# mutate(Date_ABS_Diff = abs(VisitDate.x - VisitDate.y)) %>%
# arrange(HospNum_Id, Date_ABS_Diff) %>%
# group_by(HospNum_Id) %>%
# slice(1)
#Three test merges:
ImpAndBravoWithHRM <- HRMImportMainTwo %>% inner_join(ImpAndBravo, by ="HospNum_Id") %>%
mutate(Date_ABS_Diff = abs(VisitDate.x - VisitDate.y)) %>%
arrange(HospNum_Id, Date_ABS_Diff) %>%
group_by(HospNum_Id) %>%
slice(1)
#Now filter for only those tests where there is a year or less between the BRAVO and the pH impedance (in either direction)
ImpAndBravoWithHRM$diffImpAndBravoDays<-difftime(ImpAndBravoWithHRM$VisitDate.x,ImpAndBravoWithHRM$VisitDate.y.y,tz,units = c("days"))
#These are the ones excluded mostly as too big a time difference:
#View(NoImpAndBravoWithHRM_Orig%>%select(HospNum_Id,VisitDate.x,VisitDate.y.y,diffImpAndBravoDays))
ImpAndBravoWithHRM<-ImpAndBravoWithHRM%>%filter(diffImpAndBravoDays<365,diffImpAndBravoDays>-365)
##### Extrapolate ############################################################################################################
## @knitr dataForking
#Extract the negative impedance
#Get the age of the patient at the time of doing the test:
library(eeptools)
ImpAndBravoWithHRM$ageInYears<-age_calc(na.omit(ImpAndBravoWithHRM$MainPtDataDateofBirth),ImpAndBravoWithHRM$MainPtDataDateofAdmission,precise=TRUE)/12
#Get all the Negative Impedance patients (with both positive and negative BRAVO results-#AcidRefluxBRAVOAv is the outcome variable we will use)
NegImp_FromImpWithBRavoAndHRM<-ImpAndBravoWithHRM %>% filter(AcidReflux_Imp==0)
#Also need to get rid of the non-acid reflux patients defined as >80 reflux episodes in a patient with normal AET.
NegImp_FromImpWithBRavoAndHRM<-NegImp_FromImpWithBRavoAndHRM%>% filter((MainRflxEpisodeTotalAcid+MainRflxEpisodeTotalNonacid)<80)
#Get rid of all the BRAVO columns as we don't need these as predictive variables
NegImp_FromImpWithBRavoAndHRM<-NegImp_FromImpWithBRavoAndHRM[ , !grepl( "^t[Dd]ay" , names( NegImp_FromImpWithBRavoAndHRM ) ) ]
NegImp_FromImpWithBRavoAndHRM<-NegImp_FromImpWithBRavoAndHRM[ , !grepl( "[Ss]wallo" , names( NegImp_FromImpWithBRavoAndHRM ) ) ]
#Add the BRAVO procedure information:
NegImp_FromImpWithBRavoAndHRM<-merge(NegImp_FromImpWithBRavoAndHRM, Procs,by=c("HospNum_Id","VisitDate.x"),all.x=TRUE)
NegImp_FromImpWithBRavoAndHRM$Oesophagitis<-ifelse(grepl("Y",NegImp_FromImpWithBRavoAndHRM$Oesophagitis),"Y","N")
#Impute the gender
NegImp_FromImpWithBRavoAndHRM$Gender<-ifelse(NegImp_FromImpWithBRavoAndHRM$Gender=="null",sample(c("Male","Female"), 1),NegImp_FromImpWithBRavoAndHRM$Gender)
##### BRAVO description #############################################
## @knitr BRAVOdescription
library(ggbeeswarm)
library(ggplot2)
library(gridExtra)
#Need to replicate Sweis paper and show 1. SAP worst day =63.2% 2. SI Worst day =61% 3. pH worst day <4 (>4.2%) =47% and most in the first 24 hours 4. 45% GERD based on worst day SAP or pH <4
#Positive GORD by pH<4 in 4.3% Worst day overall analysis-***
#pHWDA<-(nrow(NegImp_FromImpWithBRavoAndHRM[NegImp_FromImpWithBRavoAndHRM$worstt>=7.1,])/nrow(NegImp_FromImpWithBRavoAndHRM))*100
#Positive GORD by SAP Worst day overall analysis-
SAPWDA<-(nrow(NegImp_FromImpWithBRavoAndHRM %>%select(contains("SAP")) %>%filter_all(any_vars(. >94.9)))/nrow(NegImp_FromImpWithBRavoAndHRM))*100
#Positive GORD by SI Worst day overall analysis-
#SIWDA<-(nrow(NegImp_FromImpWithBRavoAndHRM %>%select(contains("SI")) %>%filter_all(any_vars(. >50)))/nrow(NegImp_FromImpWithBRavoAndHRM))*100
#NegImp_FromImpWithBRavoAndHRM<-merge(NegImp_FromImpWithBRavoAndHRM,MNBI,by="HospNum_Id",all.x=TRUE)
NegImp_FromImpWithBRavoAndHRM<-NegImp_FromImpWithBRavoAndHRM %>% left_join(MNBI, by = "HospNum_Id")
NegImp_FromImpWithBRavoAndHRM<-as_tibble(NegImp_FromImpWithBRavoAndHRM)
#Get rid of the patients Ismail identified as being poor quality:
NegImp_FromImpWithBRavoAndHRM<-NegImp_FromImpWithBRavoAndHRM[is.na(NegImp_FromImpWithBRavoAndHRM$comments),]
#Some some extra diagnoses for the paper
NegImp_FromImpWithBRavoAndHRM$Hypersensitive_vs_FD<-ifelse(NegImp_FromImpWithBRavoAndHRM$SAPTotalHeartburn>94.9&
!is.na(NegImp_FromImpWithBRavoAndHRM$SAPTotalHeartburn),"HypersensitiveOesophagus",
ifelse(NegImp_FromImpWithBRavoAndHRM$SAPTotalHeartburn<94.9&
!is.na(NegImp_FromImpWithBRavoAndHRM$SAPTotalHeartburn),"Functional Heartburn","NotHypersens"))
# DEFINITION OF POSITIVE WPM FOR GORD HERE
#How many days positive of the 4? (ie how many days with pH>5.3 on any one day)
#Using the average day analysis definition:
NegImp_FromImpWithBRavoAndHRMOnlyGORD<-NegImp_FromImpWithBRavoAndHRM[NegImp_FromImpWithBRavoAndHRM$AcidRefluxBRAVOAv==1,]
#Average day analysis definition:
#NegImp_FromImpWithBRavoAndHRMOnlyGORD<-NegImp_FromImpWithBRavoAndHRM[NegImp_FromImpWithBRavoAndHRM$average>5.2,]
#Need to divide the number of positive results by the number for each day that actually have a result
#(from ReflDay1FractionTimepHLessThan4Total",
#"ReflDay2FractionTimepHLessThan4Total","ReflDay1_2FractionTimepHLessThan4Total","ReflDay2_2FractionTimepHLessThan4Total"
#"ReflDay3_2FractionTimepHLessThan4Total" and use table(NegImp_FromImpWithBRavoAndHRM$ReflDay2_2FractionTimepHLessThan4Total,exclude = NULL))
#Get table with the actual day to day figures only for the positive patients:
DayToDayVar<-NegImp_FromImpWithBRavoAndHRMOnlyGORD%>%select(ReflDay1FractionTimepHLessThan4Total,ReflDay2FractionTimepHLessThan4Total,ReflDay1_2FractionTimepHLessThan4Total,ReflDay2_2FractionTimepHLessThan4Total,ReflDay3_2FractionTimepHLessThan4Total,ReflDay4_2FractionTimepHLessThan4Total,worstDaypH,averageAll)
#--------------------------------
library(matrixStats)
#Get rid of outliers:
DayToDayVar$row_std = rowSds(as.matrix(DayToDayVar[,c(1,2,3,4,5,6)]),na.rm=TRUE)
DayToDayVar$CV = 100*(DayToDayVar$row_std /DayToDayVar$averageAll)
#DayToDayVar<-DayToDayVar%>%filter(averageAll<20)%>%select(-averageAll)
#DayToDayVar<-DayToDayVar%>%filter(ReflDay2_2FractionTimepHLessThan4Total<20|NA)
DayToDayVar<-data.frame(DayToDayVar)
names(DayToDayVar)<-c("Day1","Day2","Day3","Day4","Day5","Day6","worstDay","averageAll","stdev","CV")
#For a quick ANOVA to see if there is any variability in the days with WOM:
data_long <- tidyr::gather(DayToDayVar, condition, measurement, Day1:Day6, factor_key=TRUE)
AOV<-aov(measurement~condition,data=data_long)
summary(AOV)
#See if can get some groupings:
#Graphs where there is a drop from day 1 to day 2:
DayToDayVar_earlydrop<-DayToDayVar%>%filter((Day2-Day1)<0)%>%select(-worstDay,-averageAll,-stdev,-CV)
DayToDayVar_earlyrise<-DayToDayVar%>%filter((Day2-Day1)>0)%>%select(-worstDay,-averageAll,-stdev,-CV)
DayToDayVar_day1_3drop<-DayToDayVar%>%filter((Day3-Day1)<0)%>%select(-worstDay,-averageAll,-stdev,-CV)
DayToDayVar_day1_3rise<-DayToDayVar%>%filter((Day3-Day1)>0)%>%select(-worstDay,-averageAll,-stdev,-CV)
DayToDayVar_day1_3rise<-DayToDayVar%>%filter((Day3-Day1)>0)%>%select(-worstDay,-averageAll,-stdev,-CV)
#Get the worst days
DayToDayVar_worst_is_Day1<-DayToDayVar%>%filter(worstDay==1)%>%select(-worstDay,-averageAll,-stdev)
DayToDayVar_worst_is_Day1_CV<-mean(DayToDayVar_worst_is_Day1$CV)
DayToDayVar_worst_is_Day2<-DayToDayVar%>%filter(worstDay==2)%>%select(-worstDay,-averageAll,-stdev)
DayToDayVar_worst_is_Day2_CV<-mean(DayToDayVar_worst_is_Day2$CV)
DayToDayVar_worst_is_Day3<-DayToDayVar%>%filter(worstDay==3)%>%select(-worstDay,-averageAll,-stdev)
DayToDayVar_worst_is_Day3_CV<-mean(DayToDayVar_worst_is_Day3$CV)
DayToDayVar_worst_is_Day4<-DayToDayVar%>%filter(worstDay==4)%>%select(-worstDay,-averageAll,-stdev)
DayToDayVar_worst_is_Day4_CV<-mean(DayToDayVar_worst_is_Day4$CV)
#DayToDayVar %>% mutate(mak=max(Day1,Day2,Day3,Day4,Day5,Day6))
library(reshape)
df_WorstDayTBB<-data.frame(x = seq_along(DayToDayVar[,1]),DayToDayVar)
df_WorstDayTBB <- melt(df_WorstDayTBB, id.vars = "x")
mydf<-df_WorstDayTBB%>%filter(grepl("worstDay",variable))
df_WorstDayTBB<-df_WorstDayTBB%>%left_join(mydf, by = "x")
df_WorstDayTBB<-df_WorstDayTBB%>%filter(!grepl("worst|averageAll|stdev|CV",variable.x))
df_WorstDayTBB$variable.x<-as.numeric(gsub("Day","",df_WorstDayTBB$variable.x))
library(ggplot2)
#Get rid of outliers and >day 4
df_WorstDayTBB<-df_WorstDayTBB%>%filter(df_WorstDayTBB$value.x<20)
df_WorstDayTBB<-df_WorstDayTBB%>%filter(df_WorstDayTBB$value.y<5)
WorstDaysPlot <- ggplot(df_WorstDayTBB, aes(x = variable.x, y = value.x))+
geom_line(aes(group = x, color = x))+
facet_grid(cols = vars(value.y))+
geom_smooth(method = "lm", formula = y ~ x, size = 1,aes(group=variable.y),color="red")+ xlab("Day") + ylab("AET (%)")
#Get the comparison of the lines:
WorstDaysPlot<-WorstDaysPlot+
theme_Publication()+
theme(legend.position='none')+
theme(axis.text.y = element_text(angle = 0,hjust = 1,size = 10))+
theme(axis.text.x = element_text(angle = 0,vjust=1,hjust = 1,size = 8))+
theme(axis.title.y = element_text(size = 8))+
theme(axis.title.x = element_text(size = 12))+
theme(strip.background = element_blank(), strip.placement = "outside",legend.title = element_blank())
#To Do- get the average of the line:
myworstDayTable<-data.frame(table(NegImp_FromImpWithBRavoAndHRMOnlyGORD$worstDaypH))
Day1TotNumPts<-myworstDayTable$Freq[1]/nrow(NegImp_FromImpWithBRavoAndHRMOnlyGORD[!is.na(NegImp_FromImpWithBRavoAndHRMOnlyGORD$ReflDay1FractionTimepHLessThan4Total),])*100
Day2TotNumPts<-myworstDayTable$Freq[2]/nrow(NegImp_FromImpWithBRavoAndHRMOnlyGORD[!is.na(NegImp_FromImpWithBRavoAndHRMOnlyGORD$ReflDay2FractionTimepHLessThan4Total),])*100
Day3TotNumPts<-myworstDayTable$Freq[3]/nrow(NegImp_FromImpWithBRavoAndHRMOnlyGORD[!is.na(NegImp_FromImpWithBRavoAndHRMOnlyGORD$ReflDay1_2FractionTimepHLessThan4Total),])*100
Day4TotNumPts<-myworstDayTable$Freq[4]/nrow(NegImp_FromImpWithBRavoAndHRMOnlyGORD[!is.na(NegImp_FromImpWithBRavoAndHRMOnlyGORD$ReflDay2_2FractionTimepHLessThan4Total),])*100
Day5TotNumPts<-myworstDayTable$Freq[5]/nrow(NegImp_FromImpWithBRavoAndHRMOnlyGORD[!is.na(NegImp_FromImpWithBRavoAndHRMOnlyGORD$ReflDay3_2FractionTimepHLessThan4Total),])*100
myworstDayTable$Percentage<-c(Day1TotNumPts,Day2TotNumPts,Day3TotNumPts,Day4TotNumPts,Day5TotNumPts)
names(myworstDayTable)<-c("Day","Freq","Percentage")
a2<-ggplot(myworstDayTable,aes(x=Day,y=Percentage))+
geom_histogram(stat="identity")+
scale_colour_Publication()+
theme_Publication()+
xlab("Worst Day AET pH<4 ")+
ylab("% of studies")+
theme(axis.text.y = element_text(angle = 0,hjust = 1,size = 10))+
theme(axis.text.x = element_text(angle = 0,hjust = 1,size = 10))+
theme(axis.title.y = element_text(size = 8))+
theme(axis.title.x = element_text(size = 12))+
labs(fill = "GERD")+
scale_fill_discrete(name = "GERD", labels = c("No", "Yes"))
#This graph looks at the worst pH for WPM-GORD positive and WPM-GORD negative patients
#Cant seem to get rid of the outliers so have set the average limit to 25.
a3<-ggplot(NegImp_FromImpWithBRavoAndHRM%>%filter(worstt<25), aes(x = as.factor(AcidRefluxBRAVOAv),y=worstt,fill=as.factor(AcidRefluxBRAVOAv))) +
geom_violin(opacity="50%")+
geom_boxplot(outlier.shape = NA,width=0.4)+geom_beeswarm(size=0.3,priority='density',cex=1.5)+
scale_x_discrete(labels=c("Negative","Positive"))+
scale_colour_Publication()+
theme_Publication()+
theme(legend.position='none')+
xlab("GERD")+
ylab("Worst day AET")+
theme(axis.text.y = element_text(angle = 0,hjust = 1,size = 10))+
theme(axis.text.x = element_text(angle = 0,hjust = 1,size = 10))+
theme(axis.title.y = element_text(size = 8))+
theme(axis.title.x = element_text(size = 12))+
labs(fill = "GERD") +
scale_fill_grey(name = "GERD", labels = c("No", "Yes"),start = 0.5, end = 0.8)
myNumDayspositiveTable<-data.frame(table(NegImp_FromImpWithBRavoAndHRMOnlyGORD$NumDaysBravoPositive))
Day1TotNumPosPts<-myNumDayspositiveTable$Freq[1]/nrow(NegImp_FromImpWithBRavoAndHRMOnlyGORD[!is.na(NegImp_FromImpWithBRavoAndHRMOnlyGORD$ReflDay1FractionTimepHLessThan4Total),])*100
Day2TotNumPosPts<-myNumDayspositiveTable$Freq[2]/nrow(NegImp_FromImpWithBRavoAndHRMOnlyGORD[!is.na(NegImp_FromImpWithBRavoAndHRMOnlyGORD$ReflDay1FractionTimepHLessThan4Total),])*100
Day3TotNumPosPts<-myNumDayspositiveTable$Freq[3]/nrow(NegImp_FromImpWithBRavoAndHRMOnlyGORD[!is.na(NegImp_FromImpWithBRavoAndHRMOnlyGORD$ReflDay2FractionTimepHLessThan4Total),])*100
Day4TotNumPosPts<-myNumDayspositiveTable$Freq[4]/nrow(NegImp_FromImpWithBRavoAndHRMOnlyGORD[!is.na(NegImp_FromImpWithBRavoAndHRMOnlyGORD$ReflDay1_2FractionTimepHLessThan4Total),])*100
Day5TotNumPosPts<-myNumDayspositiveTable$Freq[5]/nrow(NegImp_FromImpWithBRavoAndHRMOnlyGORD[!is.na(NegImp_FromImpWithBRavoAndHRMOnlyGORD$ReflDay2_2FractionTimepHLessThan4Total),])*100
Day6TotNumPosPts<-myNumDayspositiveTable$Freq[6]/nrow(NegImp_FromImpWithBRavoAndHRMOnlyGORD[!is.na(NegImp_FromImpWithBRavoAndHRMOnlyGORD$ReflDay3_2FractionTimepHLessThan4Total),])*100
Day7TotNumPosPts<-myNumDayspositiveTable$Freq[7]/nrow(NegImp_FromImpWithBRavoAndHRMOnlyGORD[!is.na(NegImp_FromImpWithBRavoAndHRMOnlyGORD$ReflDay4_2FractionTimepHLessThan4Total),])*100
myNumDayspositiveTable$Percentage<-c(Day1TotNumPosPts,Day2TotNumPosPts,Day3TotNumPosPts,Day4TotNumPosPts,Day5TotNumPosPts,Day6TotNumPosPts)
names(myNumDayspositiveTable)<-c("Day","Freq","Percentage")
a4<-ggplot(myNumDayspositiveTable[2:nrow(myNumDayspositiveTable),],aes(x=Day,y=Percentage))+
geom_histogram(stat="identity")+
scale_colour_Publication()+
theme_Publication()+
xlab("Number of days of GERD")+
ylab("% of studies")+
theme(axis.text.y = element_text(angle = 0,hjust = 1,size = 10))+
theme(axis.text.x = element_text(angle = 0,hjust = 1,size = 10))+
theme(axis.title.y = element_text(size = 8))+
theme(axis.title.x = element_text(size = 12))+
labs(fill = "GERD") + scale_fill_discrete(name = "GORD", labels = c("No", "Yes"))
#+
# theme(text=element_text(size=10, family="Arial"))+
# ggsave("Figure1a.jpeg", width = 20, height = 20, dpi = 300, units="cm")
#Correlating the worst Day vs the number of positive days with BRAVO:
NumDays_vs_WorstDay<-merge(myNumDayspositiveTable[2:nrow(myNumDayspositiveTable),],myworstDayTable,by="Day")
NumDays_vs_WorstDayCorr<-cor.test(NumDays_vs_WorstDay$Percentage.x,NumDays_vs_WorstDay$Percentage.y)
############## Get the symptoms of the patients: ###########
interim<-strsplit(NegImp_FromImpWithBRavoAndHRM$AllSymps_BRAVO,",")
interim<-lapply(interim,function(x)unique(x,","))
interim<-lapply(interim,function(x) sort(x))
new<-unlist(lapply(interim,function(x) paste0(x,collapse=",")))
NegImp_FromImpWithBRavoAndHRM$AllSymps_BRAVO<-gsub("Epigastric","StomachPain",new)
BelchSx<-nrow(NegImp_FromImpWithBRavoAndHRM[grepl("Belch",NegImp_FromImpWithBRavoAndHRM$AllSymps_BRAVO),])
ChestPainSx<-nrow(NegImp_FromImpWithBRavoAndHRM[grepl("ChestPain",NegImp_FromImpWithBRavoAndHRM$AllSymps_BRAVO),])
CoughSx<-nrow(NegImp_FromImpWithBRavoAndHRM[grepl("Cough",NegImp_FromImpWithBRavoAndHRM$AllSymps_BRAVO),])
HeartburnSx<-nrow(NegImp_FromImpWithBRavoAndHRM[grepl("Heartburn",NegImp_FromImpWithBRavoAndHRM$AllSymps_BRAVO),])
RegurgitationSx<-nrow(NegImp_FromImpWithBRavoAndHRM[grepl("Regurgitation",NegImp_FromImpWithBRavoAndHRM$AllSymps_BRAVO),])
ThroatSx<-nrow(NegImp_FromImpWithBRavoAndHRM[grepl("Throat",NegImp_FromImpWithBRavoAndHRM$AllSymps_BRAVO),])
VomitingSx<-nrow(NegImp_FromImpWithBRavoAndHRM[grepl("Vomiting",NegImp_FromImpWithBRavoAndHRM$AllSymps_BRAVO),])
NauseaSx<-nrow(NegImp_FromImpWithBRavoAndHRM[grepl("Nausea",NegImp_FromImpWithBRavoAndHRM$AllSymps_BRAVO),])
StomachPainSx<-nrow(NegImp_FromImpWithBRavoAndHRM[grepl("StomachPain",NegImp_FromImpWithBRavoAndHRM$AllSymps_BRAVO),])
Symps<-data.frame(BelchSx,ChestPainSx,CoughSx,HeartburnSx,RegurgitationSx,ThroatSx,VomitingSx,NauseaSx,StomachPainSx)
Sympdf<-gather(Symps, "Symptom", "Frequency", 1:ncol(Symps))
Sympdf$Percentage<-((Sympdf$Frequency/nrow(NegImp_FromImpWithBRavoAndHRM))*100)
Sympdf$Symptom<-gsub("Sx","",Sympdf$Symptom)
a5<-ggplot(Sympdf,aes(x=Symptom,y=Frequency)) +
geom_histogram(stat="identity")+
scale_colour_Publication()+
theme_Publication()+
xlab("Symptom")+
ylab("% of studies")+
theme(axis.text.y = element_text(angle = 0,hjust = 1,size = 10))+
theme(axis.text.x = element_text(angle = 30,vjust=1,hjust = 1,size = 8))+
theme(axis.title.y = element_text(size = 8))+
theme(axis.title.x = element_text(size = 12))
#+
#theme(text=element_text(size=10, family="Arial"))+
#ggsave("Figure1d.jpeg", width = 20, height = 20, dpi = 300, units="cm")
df_WorstDayTBB2<-df_WorstDayTBB%>%dplyr::rename(DayPlot=variable.x,AET=value.x,WorstDayGroup=value.y)%>%select(DayPlot,AET,WorstDayGroup)
my_means<-df_WorstDayTBB2%>%group_by(WorstDayGroup,DayPlot)%>%dplyr::summarize(Mean = mean(AET, na.rm=FALSE))
my_means<-data.frame(my_means)
my_means$WorstDayGroup<-as.character(my_means$WorstDayGroup)
compLines<-ggplot(data=my_means,aes(x=DayPlot,y=Mean,group=WorstDayGroup))+
geom_smooth(method = "lm", formula = y ~ x, size = 1,aes(group=WorstDayGroup),color="red")+ xlab("Day") + ylab("AET (%)")+
theme_Publication()+
theme(axis.text.y = element_text(angle = 0,hjust = 1,size = 10))+
theme(axis.text.x = element_text(angle = 0,vjust=1,hjust = 1,size = 8))+
theme(axis.title.y = element_text(size = 8))+
theme(axis.title.x = element_text(size = 12))+
xlim(1,5)
library(patchwork)
patch<-(a4+a3)/(a2+a5)/(WorstDaysPlot+compLines)
patchwork<-patch & theme(legend.position='none')
#patchwork
patchwork+plot_annotation(tag_levels = c('A', '1'),
tag_suffix = '')+
theme(plot.tag.position = c(0, 0),
plot.tag = element_text(size = 8, hjust = 0, vjust = 0))
#+
# ggsave(here::here("inst","Projects","BRAVOStudies","NegImpPredictorsOfAllPosBRAVO","reports","Figure1_Complete.jpeg"), width = 20, height = 20, dpi = 300, units="cm")
##### Select columns with minimal data for data quality ############################################
## @knitr missingClean
#attr(NegImp_FromImpWithBRavoAndHRM$M2_noct, "label") <- "MethodTwo Nocturnal"
#Remove columns that are not meaningful:
#Impedance symptoms:
#Get all the MainAcidExposure/MainSx columns as your main impedance dataset. Note this also gets rid of composite score(=DeMeester) as it is derived from MainAcidExp so likely to correlate too much for regression
#NegImp_FromImpWithBRavoAndHRM$WorstD_ayAnalysisGORDPositive
NegImp_FromImpWithBRavoAndHRMMinSet<-NegImp_FromImpWithBRavoAndHRM[,grepl("ageInYears|MainAcidEx|MainSx|SxMain|HospNum_Id|DistalLESfromnarescm|Gender|DOBAge|Height|LESlengthcm|PIPfromnarescm|Hiatalhernia|BasalrespiratoryminmmHg|ResidualmeanmmHg|BasalrespiratorymeanmmHg|DistalcontractileintegralmeanmmHgcms|Contractilefrontvelocitycms|IntraboluspressureATLESRmmHg|Distallatency|failedChicagoClassification|panesophagealpressurization|largebreaks|Simultaneous|prematurecontraction|rapidcontraction|smallbreaks|VisitDate|Age|AcidReflux|Upright|Recumbent|SAP|M2_noct|M1_av|pspw|WorstD_|AllSymps_BRAVOgrouped|Oesophagitis|Grade|LESmidpointfromnarescm|MainRflxEpisodeTotalNonacid",names(NegImp_FromImpWithBRavoAndHRM))]
#attr(NegImp_FromImpWithBRavoAndHRMMinSet$M2_noct, "label") <- "MethodTwo Nocturnal"
#Also get rid of all columns containing unrelated symptoms and Nonacid and only look at the acid related things (so not "AllReflux")
NegImp_FromImpWithBRavoAndHRMMinSet<-NegImp_FromImpWithBRavoAndHRMMinSet[,!grepl("Unrelated|AllReflux",names(NegImp_FromImpWithBRavoAndHRMMinSet))]
#Remove RSSI as dont know what it is and SxCorr as it is just a number rather than a symptom score (RSI and SAP derived from it anyway)
NegImp_FromImpWithBRavoAndHRMMinSet<-NegImp_FromImpWithBRavoAndHRMMinSet[,!grepl("^SITotal|^SIDay|RSSI|SxCorr|ChannelTime|Duration|ClearanceTime|Composite|PP",names(NegImp_FromImpWithBRavoAndHRMMinSet))]
#Combine columns so they are more meaningful. Use the SI and SAP as potential parameters but only if significantly associated
#Combine symptoms so have only three categories- LPR (Throat/cough/Globus)/Oesophageal(Chest pain/regurgitation/belch/vomiting/heartburn)/Abdominal (Stomachpain)
#Also just code the symptoms as being present or not so if any of the members of the category are SAP >95% or SI >50% then it is a positive finding for that category
#Now get rid of BRAVO columns you dont need:
NegImp_FromImpWithBRavoAndHRMMinSet<-NegImp_FromImpWithBRavoAndHRMMinSet[,!grepl("Meds|UESMeanResidLocationcenterfrnarescm|ResidMeanbasalpressuremmHg|ResidMeanresidualpressuremmHg|MainProc|Catheter|NumDaysBravoPositive|Migration|HospNum_Id|HRM_Id|Physician|ReferringPhysician|Operator|BravoID|VisitDate|Stats|Procedure|FileCreationDate|TimeToNext|TimeSinceLast|BravoID|Imp_Id|^id$|PatientScore|MainPtData|GastricChannel|MainProcProcedureStart|MainProcProcedureDuration",names(NegImp_FromImpWithBRavoAndHRMMinSet))]
#Tells you which columns have the most missing data so they can be excluded
library(DataExplorer)
exclude_missing<-function(df){
missing<-profile_missing(df)
missing<-data.frame(missing,stringsAsFactors = FALSE)
#Only <10% for each row missing data is accepted
df_m<-missing[!grepl("Remove|Bad",missing$group),]
#Pick the columns with good data in them from this set
df<-df[,match(as.character(df_m$feature),names(df))]
df<-data.frame(df,stringsAsFactors = FALSE)
#3. Get some basic descriptive stats:
#rr<-describe(df)
#Select only the numeric variables
#df<-dplyr::select_if(df, is.numeric)
return(df)
}
#Only use columns with good quality data:
#NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal<-exclude_missing(NegImp_FromImpWithBRavoAndHRMMinSet)
NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal<-NegImp_FromImpWithBRavoAndHRMMinSet
############### Consort diagram ###############
library(DiagrammeR)
library(DiagrammeRsvg) #Needed if you want to export the image
#library(rsvg) #Needed if you want to export the image
#Set the values which will go into each label.
a1 <- paste0("a1: Number of impedance studies =(",nrow(ImpAndhrm), ") \n Patients =",length(unique(ImpAndhrm$HospNum_Id)))
b1 <- ''
c1 <- ''
d1 <- paste0("d1: Number of Impedance studies \n negative for GORD=",nrow(ImpAndhrm[ImpAndhrm$AcidReflux_Imp==0,]), ")","\n Patients:",length(unique(ImpAndhrm[ImpAndhrm$AcidReflux_Imp==0,]$HospNum_Id)))
e1 <- paste0("e1: Number patients \n with negative impedance and subsequent BRAVO: " ,nrow(NegImp_FromImpWithBRavoAndHRM))
a2 <- ''
b2 <- ''
c2 <- ''
d2 <- ''
e2 <- ''
a3 <- ''
b3 <- ''
c3 <- ''
d3 <- ''
e3 <- ''
#Create a node dataframe
ndf <- create_node_df(
n = 15,
label = c(a1, b1, c1, d1, e1, #Column 1
a2, b2, c2, d2, e2, #Column 2
a3, b3, c3, d3, e3), #Column 2
style = c('solid', 'invis', 'invis', 'solid', 'solid', #Column 1
'invis', 'invis', 'invis', 'invis', 'invis', #Column 2
'invis', 'invis', 'invis', 'invis', 'invis'), #Column 3
shape = c('box', 'box', 'point', 'box', 'box', #Column 1
'box', 'point', 'point', 'box', 'point', #Column 2
'point', 'point', 'point', 'point', 'point'), #Column 3
width = c(3, 0.001, 0.001, 3, 3, #Column 1
0.001, 0.001, 0.001, 0.001, 0.001, #Column 2
0.001, 0.001, 0.001, 0.001, 0.001), #Column 3
height = c(1, 0.001, 0.001, 1, 1, #Column 1
0.001, 0.001, 0.001, 0.001, 0.001, #Column 2
0.001, 0.001, 0.001, 0.001, 0.001), #Column 3
fontsize = c(rep(6, 1)),
fontname = c(rep('Helvetica', 10)),
penwidth = 1.5,
fixedsize = 'true')
#Create an edge dataframe
edf <- create_edge_df(
from = c(1,4#Horizontals
),
to = c(4,5 #Horizontals
),
arrowhead = c('normal', 'normal', 'none', 'none','none','none', #Column 1
'none', 'none', 'normal', 'none', #Column 2
'none', 'none' #Horizontals
),
color = c('black', '#black', '#00000000', '#00000000', '#00000000', '#00000000', #Column 1
'#00000000', '#00000000', '#00000000', '#00000000', #Column 2
'#00000000', '#00000000' #Horizontals
),
constraint = c(rep('true', 8), #Columns
rep('false', 2) #Horizontals
)
)
g <- create_graph(ndf,
edf,
attr_theme = NULL)
render_graph(g)
###################### Day to Day ANOVA ########################################
## @knitr Day2DayANOVA
library(ez)
#res.aov <- anova_test(data = my_means, dv = score, wid = id, within = time)
#get_anova_table(res.aov)
my_means=my_means[my_means$DayPlot!=6,]
repeat1<-ezANOVA(data=my_means ,dv=.(Mean),wid=.(WorstDayGroup),within=.(DayPlot),type=3)
#library(lsmeans)
#m.interaction <- lm(Mean~WorstDayGroup*DayPlot,data=my_means)
#myDayToDayAnova<-anova(m.interaction)
#m.interaction$coefficients
#m.lst <- lstrends(m.interaction, "WorstDayGroup", var="DayPlot")
#comparisonRegressionLines<-pairs(m.lst)
#myDayToDayAnovadf<-data.frame(myDayToDayAnova)
#dayToDayAnovaTable<-flextable(myDayToDayAnovadf %>% tibble::rownames_to_column("column name"))%>%
# set_caption(caption = "Table 1: Example") %>%
#font(fontname = "Calibri (Body)", part = "all") %>%
#fontsize(size = 10, part = "body") %>%
# add footer if you want
# add_footer_row(values = "* p < 0.05. ** p < 0.01. *** p < 0.001.",
# colwidths = 4) %>%
# theme_booktabs() %>% # default theme
# autofit()
#dayToDayAnovaTable
###################### Univariate Anaylsis OR ########################################
## @knitr HRM_UnivariateAnalysisOR
NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal$AcidRefluxBRAVOAv<-factor(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal$AcidRefluxBRAVOAv,
levels=c(0,1),
labels=c("Negative", # Reference
"Positive"))
NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal$WorstD_ayAnalysisGORDPositive<-factor(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal$WorstD_ayAnalysisGORDPositive,
levels=c(0,1),
labels=c("Negative", # Reference
"Positive"))
#Rearrange columns so can do the univariate analysis more efficiently:
NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal<-NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal %>% select(AcidRefluxBRAVOAv,1:12, everything())
#Create labels
#MNBI$M2_noct<-as.numeric(MNBI$M2_noct)
#MNBI$M1_av<-as.numeric(MNBI$M1_av)
#For the table get rid of variables you dont want to see
NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2<-NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal %>% dplyr::select(
#-LOS_relax,
-DOBAge,
#-LowerOesoph,
# -AllSymps_BRAVOcompartment,
# -EsophageallengthLESUEScenterscm,
#-ProximalLESfromnarescm,
-DistalLESfromnarescm,
-LESlengthcm,
-Height,
#-MainRflxEpisodeTotalAcid,
-Simultaneous,
-prematurecontraction,
-rapidcontraction,
-PIPfromnarescm,
dplyr::contains("Oesophagitis"),
# dplyr::contains("M2_noct"),
dplyr::contains("M1_av"),
dplyr::contains("Method")
-dplyr::contains("TotalAcid"),
-dplyr::contains("SAPDay"),
-dplyr::contains("SAP",ignore.case = FALSE),
-dplyr::contains("SI",ignore.case = FALSE),
-dplyr::contains("SAPTotal"),
-dplyr::contains("SxMain"),
-dplyr::contains("MainSx"),
-IntraabdominalLESlengthcm,
-Hiatalhernia,
# -Findings,
-BasalrespiratorymeanmmHg,
-IntraboluspressureATLESRmmHg,
# -DistalcontractileintegralhighestmmHgcms,
# -AcidRefluxBRAVOTotalOnly,
#-AllSymps_Impgrouped,
#-AllSymps_BRAVO,
#-AllImpSymptom,
-AcidReflux_Imp,
-Age,
matches("SAPTotalHeartburn|SAPTotalChestPain|SAPTotalRegurgitation|SAPTotalBelch|SAPTotalCough|SAPTotalThroat|SAPTotalEpigastric|SAPTotalVomiting|SAPTotalNausea"),
-matches("SAPTotalOther|SAPTotalPostPr|SAPTotalMeal"),
-SAPOesophageal,
-SAPOther,
-SAPLPR,
#-SIOesophageal,
#-SIOther,
#-SILPR,
-contains("SAP"),
-contains(".y"),
-matches("worstt|worstDaypH|Day2|ReflD|DeMeester|Recumbent|Prox|Upright|Day1Pos|Day2Pos|Day3Pos|Day4Pos|Day5Pos|Day6Pos|Trajec")
#-average,
#-averageAll
)
#Do some imputation for missing data:
#library(mice)
#imputed<-mice(data = NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2, m = 5, method = "pmm", maxit = 50, seed = 500)
#NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2<-complete(imputed)
#attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$M2_noct, "label") <- "MNBI Nocturnal"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$M1_av, "label") <- "MNBI"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$ageInYears, "label") <- "Age (years)"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$Oesophagitis, "label") <- "Oesophagitis"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$DistalcontractileintegralmeanmmHgcms, "label") <- "DCI (mean mmHg/cm/s)"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$Contractilefrontvelocitycms, "label") <- "Contractile front velocity (m/s)"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$Distallatency, "label") <- "Distal latency (s)"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$panesophagealpressurization, "label") <- "% Panoesophageal pressurization"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$largebreaks, "label") <- "% Large Breaks"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$smallbreaks, "label") <- "% Small breaks"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$MainRflxEpisodeTotalNonacid, "label") <- "Number of Non acid episodes"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$MainAcidExpTotalClearanceChannelNumberofAcidEpisodes, "label") <- "Number of Acid Episodes"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$MainAcidExpTotalClearanceChannelPercentTime, "label") <- "AET"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$MainAcidExpTotalClearanceChannelLongestEpisode, "label") <- "Longest acid episode"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$LESmidpointfromnarescm, "label") <- "LES midpoint from nares (cm)"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$BasalrespiratoryminmmHg, "label") <- "Basal resp. min. (mmHg)"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$ResidualmeanmmHg, "label") <- "Residual mean (mmHg)"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$failedChicagoClassification, "label") <- "% Failed peristalsis"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$AllSymps_BRAVOgrouped, "label") <- "All symptoms grouped"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$Oesophagitis, "label") <- "Endoscopic Findings"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$pspw_index, "label") <- "PSPWI"
#Symptom cleaning for analysis: Need to have as yes no answers rather than actual SAPs
# NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalHeartburn<-ifelse(!is.na(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalHeartburn),"Y","N")
# NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalChestPain<-ifelse(!is.na(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalChestPain),"Y","N")
# NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalRegurgitation<-ifelse(!is.na(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalRegurgitation),"Y","N")
# NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalBelch<-ifelse(!is.na(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalBelch),"Y","N")
# NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalCough<-ifelse(!is.na(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalCough),"Y","N")
# NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalThroat<-ifelse(!is.na(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalThroat),"Y","N")
# NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalEpigastric<-ifelse(!is.na(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalEpigastric),"Y","N")
# NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalVomiting<-ifelse(!is.na(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalVomiting),"Y","N")
# NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPOesophageal<-ifelse(!is.na(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPOesophageal),"Y","N")
# NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPOther<-ifelse(!is.na(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPOther),"Y","N")
# NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPLPR<-ifelse(!is.na(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPLPR),"Y","N")
# NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$MethodOne_average<-as.numeric(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$MethodOne_average)
# NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$MethodTwo_WholeNocturnalPeriod<-as.numeric(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$MethodTwo_WholeNocturnalPeriod)
#Get rid of rows where the impedance data is missing following the merge:
#Make sure the group to compare is the first column and then list the last column as the final variable to compare for each group:
library(compareGroups)
resOR <- compareGroups(AcidRefluxBRAVOAv ~ . , NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2, compute = TRUE)
########### Univariate predictors value #############################################
## @knitr UnivariatePredictorsValue
#Rearrange columns so age and gender are together
NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2<-NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2 %>% select(AcidRefluxBRAVOAv,ageInYears,1:12, everything())
#Remove some columns that arent for this table but are for the WDA analysis
NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal3<-NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2%>%select(-contains("worst"))
library(gtsummary)
so<-tbl_summary(
data = NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal3,
by = AcidRefluxBRAVOAv,missing = "no",
type = list(all_numeric() ~ "continuous"),
# change statistics printed in table
statistic = list(all_continuous() ~ "{mean} ({sd})",
all_categorical() ~ "{n} / {N} ({p}%)"),
digits = list("marker" ~ c("Y", "N"))
)
# add p-values, report t-test, round large pvalues to two decimal place
so<-add_p(so,test = list(vars(marker) ~ "t.test"),
pvalue_fun = function(x) style_pvalue(x, digits = 2))
# bold variable labels, italicize levels
so<- bold_labels(so)
so<-italicize_levels(so)
# bold p-values under a given threshold (default is 0.05)
so<-bold_p(so,t = 0.05)
# include percent in headers
so<-modify_header(so,stat_by = "**{level}**, N = {n} ({style_percent(p, symbol = TRUE)})")
as_gt(so)
#Choose variables automatically from the univariate analysis:
sodf<-data.frame(so$meta_data,stringsAsFactors = FALSE)
sodf<-sodf%>%select(var_label,p.value)%>%filter(p.value < 0.5e-01)
NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal4<-NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2%>%select(-"AcidRefluxBRAVOAv")
soWDA<-tbl_summary(
data = NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal4,
by = WorstD_ayAnalysisGORDPositive,missing = "no",
type = list(all_numeric() ~ "continuous"),
# change statistics printed in table
statistic = list(all_continuous() ~ "{mean} ({sd})",
all_categorical() ~ "{n} / {N} ({p}%)"),
digits = list("marker" ~ c("Y", "N"))
)
# add p-values, report t-test, round large pvalues to two decimal place
soWDA<-add_p(soWDA,test = list(vars(marker) ~ "t.test"),
pvalue_fun = function(x) style_pvalue(x, digits = 2))
# bold variable labels, italicize levels
soWDA<- bold_labels(soWDA)
soWDA<-italicize_levels(soWDA)
# bold p-values under a given threshold (default is 0.05)
soWDA<-bold_p(soWDA,t = 0.05)
# include percent in headers
soWDA<-modify_header(soWDA,stat_by = "**{level}**, N = {n} ({style_percent(p, symbol = TRUE)})")
as_gt(soWDA)
#Choose variables automatically from the univariate analysis:
soWDA_df<-data.frame(soWDA$meta_data,stringsAsFactors = FALSE)
soWDA_df<-soWDA_df%>%select(var_label,p.value)%>%filter(p.value < 0.5e-01)
options(gtsummary.as_gt.addl_cmds = "gt::tab_options(table.font.size = 'x-small', data_row.padding = gt::px(1))")
soWDAandAV<-tbl_merge(list(so,soWDA),tab_spanner = c("Average day analysis", "Worst day analysis"))
soWDAandAV
#Need to choose more variables here from the univariate above based on the p--value <0.05
chosen<-data.frame(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$AcidRefluxBRAVOAv, NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2[, Hmisc::label(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2) %in% sodf$var_label])
chosen<-chosen %>% dplyr::rename(AcidRefluxBRAVOAv = NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2.AcidRefluxBRAVOAv)
#Need to choose more variables here from the univariate above using average day analysis based on the p--value <0.05
chosenWDA<-data.frame(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$WorstD_ayAnalysisGORDPositive, NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2[, Hmisc::label(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2) %in% soWDA_df$var_label])
chosenWDA<-chosenWDA %>% dplyr::rename(WorstD_ayAnalysisGORDPositive = NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2.WorstD_ayAnalysisGORDPositive)
########### Univariate predictors graph #############################################
## @knitr UnivariatePredictorsGraph
library(ggplot2)
p1<-ggplot(chosen, aes(x = AcidRefluxBRAVOAv, y = chosen[,2])) +
geom_violin(outlier.shape = NA,fill='red',alpha=0.3)+geom_beeswarm(size=0.5,priority='density',cex = 2)+
geom_boxplot(width=0.1,alpha=0.4)+
scale_colour_Publication()+
theme_Publication()+
ylab(Hmisc::label(chosen[2]))+
xlab("WPM-GERD")
#+
# ggsave("Figure2a.jpeg", width = 20, height = 20, dpi = 300, units="cm")
p2<-ggplot(chosen, aes(x = AcidRefluxBRAVOAv, y = chosen[,3])) +
geom_violin(outlier.shape = NA,fill='red',alpha=0.3)+geom_beeswarm(size=0.5,priority='density',cex = 2)+
geom_boxplot(width=0.1,alpha=0.4)+
scale_colour_Publication()+
theme_Publication()+
ylab(Hmisc::label(chosen[3]))+
xlab("WPM-GERD")
#+
# ggsave("Figure2b.jpeg", width = 20, height = 20, dpi = 300, units="cm")
p3<-ggplot(chosen, aes(x = AcidRefluxBRAVOAv, y = chosen[,4])) +
geom_violin(outlier.shape = NA,fill='red',alpha=0.3)+geom_beeswarm(size=0.5,priority='density',cex = 2)+
geom_boxplot(width=0.1,alpha=0.4)+
scale_colour_Publication()+
theme_Publication()+
ylab(Hmisc::label(chosen[4]))+
xlab("WPM-GERD")
#+
# ggsave("Figure2c.jpeg", width = 20, height = 20, dpi = 300, units="cm")
#
p4<-ggplot(chosen, aes(y = AcidRefluxBRAVOAv)) +
geom_bar(aes(fill = chosen[,5]),position="fill") +
scale_colour_Publication()+
theme_Publication()+
xlab("Number of studies")+
ylab("GERD")+
labs(fill = "Oesophagitis") + scale_fill_discrete(name = "Oesophagitis", labels = c("No", "Yes"))
#theme(text=element_text(size=10, family="Arial")) +
#ggsave("Figure2c.jpeg", width = 20, height = 20, dpi = 300, units="cm")
ggarrange(p1,p2,p3,p4,
labels = c("a)","b)","c)"),
nrow = 2,
ncol = 2)+
theme(text=element_text(size=6, family="Arial"))
#ggsave("Figure2_Complete.jpeg", width = 20, height = 20, dpi = 300, units="cm")
########### Multiple Logistic Regression2 #############################################
## @knitr HRM_MultipleLogRegression2
#To get univariate and multivariate odds ratios:
library(finalfit)
chosen$AcidRefluxBRAVOAv<-as.factor(chosen$AcidRefluxBRAVOAv)
#Note for some reason I cant get Age and SAP ones in here
explanatory = names(chosen[,!grepl("AcidRefluxBRAVOAv",names(chosen))])
dependent = 'AcidRefluxBRAVOAv'
mytable<-chosen %>%
finalfit.glm(dependent, explanatory)
#convert oesophagitis to numeric
chosen$Oesophagitis<-ifelse(grepl("Y",chosen$Oesophagitis),1,0)
mod1 <- glm(chosen$AcidRefluxBRAVOAv ~ ., data = dplyr::select_if(chosen, is.numeric), family = binomial(link = "logit"))
AvReg<-tbl_regression(mod1, exponentiate = TRUE)%>%bold_p(t = 0.05)
#Choose variables automatically from the univariate analysis:
AvReg_df<-data.frame(AvReg$table_body,stringsAsFactors = FALSE)
AvReg_df<-AvReg_df%>%select(variable,p.value)%>%filter(p.value < 0.5e-01)
####### For WDA:
#convert oesophagitis to numeric
chosenWDA$Oesophagitis<-ifelse(grepl("Y",chosenWDA$Oesophagitis),1,0)
chosenWDA$WorstD_ayAnalysisGORDPositive<-as.factor(chosenWDA$WorstD_ayAnalysisGORDPositive)
#Note for some reason I cant get Age and SAP ones in here
explanatory = names(chosenWDA[,!grepl("WorstD_ayAnalysisGORDPositive",names(chosenWDA))])
dependent = 'WorstD_ayAnalysisGORDPositive'
mytableWDA<-chosenWDA %>%
finalfit.glm(dependent, explanatory)
#convert oesophagitis to numeric
#chosenWDA$Oesophagitis<-ifelse(grepl("Y",chosenWDA$Oesophagitis),1,0)
mod1WDA <- glm(chosenWDA$WorstD_ayAnalysisGORDPositive ~ ., data = dplyr::select_if(chosen, is.numeric), family = binomial(link = "logit"))
WDAReg<-tbl_regression(mod1WDA, exponentiate = TRUE)%>%bold_p(t = 0.051)
tbl_merge(list(AvReg,WDAReg),tab_spanner = c("Average day analysis", "Worst day analysis"))
########## Cut Point For pH <4 #############################################
## @knitr CutPointForAcid
chosen$AcidRefluxBRAVOAv<-as.character(chosen$AcidRefluxBRAVOAv)
#Determine optimal cutpoints for the Percent time <4 (the last column in the chosen table)
library(cutpointr)
vb<-cutpointr(x = chosen[,4], class = chosen$AcidRefluxBRAVOAv,na.rm=TRUE,method = maximize_metric, metric = sum_sens_spec)
dens1<-plot_x(vb)+geom_density(color="white", fill="grey")+
xlab(Hmisc::label(names(chosen[ncol(chosen)])))+
ggtitle(paste0(""))+
labs(subtitle = "")+
theme(text=element_text(size=10, family="Arial"))
#ggsave("Figure3a.jpeg", width = 20, height = 20, dpi = 300, units="cm")
roc1<-plot_roc(vb)+ ggtitle(paste0(""))+
theme(text=element_text(size=10, family="Arial"))
#ggsave("Figure3b.jpeg", width = 20, height = 20, dpi = 300, units="cm")
# Look at the difference between the diagnoses of reflux with worst and average day analysis:
newout<-NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal
newout$WorstD_ayAnalysisGORDPositive<-gsub("Negative",0,newout$WorstD_ayAnalysisGORDPositive)
newout$WorstD_ayAnalysisGORDPositive<-gsub("Positive",1,newout$WorstD_ayAnalysisGORDPositive)
newout$WorstD_ayAnalysisGORDPositive<-as.numeric(newout$WorstD_ayAnalysisGORDPositive)
newout$AcidRefluxBRAVOAv<-gsub("Negative",0,newout$AcidRefluxBRAVOAv)
newout$AcidRefluxBRAVOAv<-gsub("Positive",1,newout$AcidRefluxBRAVOAv)
newout$AcidRefluxBRAVOAv<-as.numeric(newout$AcidRefluxBRAVOAv)
newout$DiagnosticDifference<-newout$WorstD_ayAnalysisGORDPositive-newout$AcidRefluxBRAVOAv
#Just pick the ones that are different:
DiffDxPatients<-newout[newout$DiagnosticDifference!=0,]
DiffDxPatients<-DiffDxPatients%>%select(AcidRefluxBRAVOAv,DistalLESfromnarescm,Gender,DOBAge,
AllSymps_BRAVOgrouped,MainAcidExpTotalClearanceChannelPercentTime,
MainAcidExpTotalClearanceChannelNumberofAcidEpisodes,AcidReflux_Imp,
ageInYears,Oesophagitis,Grade,M1_av,pspw_index,DiagnosticDifference)
########## diagrammeR #############################################
## @knitr diagrammeR
#Other analyses:
#Day by day analysis to see if patients more likely to have intermittent symptoms especially for reflux and heartburn using the BRAVO results
#Compare BRAVO positive with BRAVO negative
#Are the hypersensitives also the ones with high AET. ie do HOvsHighAET NoHovs HighAET HOvsLowAET and NoHOvsLowAET and compare the groups (high AET is > optimal cutpoint)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.