PhysiMineR: Functions to extract, clean, manipulate and analyse upper gastrointestinal physiological parameters

library(dplyr)
library(stringr)
library(here)
library(EndoMineR)
library(readxl)
library(tidyr)
library(PhysiMineR)
library(finalfit)
library(knitr)
library(kableExtra)
library(cutpointr)
library(ggbeeswarm)
library(ggplot2)
library(gridExtra)
library(gtsummary)
library(gt)
library(matrixStats)
library(ggpubr)
library(flextable)
library(knitr)
library(here)
library(reshape)
#library(checkpoint)


#checkpoint("2020-08-14")


##### Import ####
## @knitr dataImport

oldw <- getOption("warn")

options(warn = -1)

#Get this from the data folder for PhysiMineR-  just click on the RData to load into
#the global environment

#Or if using the local file system

BravoDayOneAndTwo <- read_excel(here::here("inst/Projects/BRAVOStudies/NegImpPredictorsOfAllPosBRAVO/data/BravoDay1And2.xls"))
BravoDayThreeAndFour <- read_excel(here::here("inst/Projects/BRAVOStudies/NegImpPredictorsOfAllPosBRAVO/data/BravoDay3And4.xls"))
BRAVOTotal <- read_excel(here::here("inst/Projects/BRAVOStudies/NegImpPredictorsOfAllPosBRAVO/data/BRAVOTotal.xls"))
#Diag <- read_excel(here::here("inst/Projects/BRAVOStudies/NegImpPredictorsOfAllPosBRAVO/data/Diag.xls"))
HRMImportMain <- read_excel(here::here("inst/Projects/BRAVOStudies/NegImpPredictorsOfAllPosBRAVO/data/HRMImportMain.xls"))
HRMImportSwallows <- read_excel(here::here("inst/Projects/BRAVOStudies/NegImpPredictorsOfAllPosBRAVO/data/HRMImportSwallows.xls"))
Imp_Symp <- read_excel(here::here("inst/Projects/BRAVOStudies/NegImpPredictorsOfAllPosBRAVO/data/Imp_Symp.xls"))
ImpedanceTwo <- read_excel(here::here("inst/Projects/BRAVOStudies/NegImpPredictorsOfAllPosBRAVO/data/Impedance2.xls"))
Procs <- read_excel(here::here("inst/Projects/BRAVOStudies/NegImpPredictorsOfAllPosBRAVO/data/ProcByHospitalNumbers.xls"))
Procs<-Procs%>%dplyr::rename(Grade='Oesophagitis Grade')
PSPW<-readxl::read_xlsx((here::here("inst/Projects/BRAVOStudies/NegImpPredictorsOfAllPosBRAVO/data/Seb_PSPW_list (003).xlsx")))
PSPW<-PSPW%>%dplyr::rename(HospitalNumber=HospitalNum)
MNBI<-read_excel(here::here("inst/Projects/BRAVOStudies/NegImpPredictorsOfAllPosBRAVO/data/MNBI.xlsx"))
MNBI2<-read_excel(here::here("inst/Projects/BRAVOStudies/NegImpPredictorsOfAllPosBRAVO/data/MNBI2.xlsx"))

MNBI<-rbind(MNBI,MNBI2)

MNBI<-merge(MNBI,PSPW,by="HospitalNumber",all.x=TRUE)

#Make compatibile with Hospital Number column below:
MNBI<-MNBI%>%dplyr::rename(HospNum_Id=HospitalNumber)
options(warn = oldw)




##### Clean ####################################################################################################################
## @knitr dataClean
#Data cleaning
#Use after PhysiData_Acq
#Clean HRM
#Clean up the HRM swallows:
HRMImportSwallows<-HRMImportSwallows[!is.na(HRMImportSwallows$panesophagealpressurizationMapSwallowsNum8),]


##### IntraTestMergeAndClean ########################################################################################
## @knitr dataIntraTestMerge

# Get the whole impedance dataset
#You will need to re-clean the merged dataImpSympImpedance as cleaning function needed to be fixed
AllImpedance<-merge(ImpedanceTwo,Imp_Symp,by="Imp_Id", all = TRUE)
ImpedanceThree<-dataImpClean(AllImpedance)
ImpAll<-dataImpSymptoms(ImpedanceThree)
ImpAll<-GORD_AcidImp(ImpAll)

#Diag<-dataDiagClean(Diag)


#Merge the HRM results together (swallows and Main)
AllHRM<-merge(HRMImportMain,HRMImportSwallows,by="HRM_Id",all=TRUE)

#Get rid of the swallows for now as using the whole HRM dataset makes things slow:
AllHRM<-AllHRM[,!grepl("Num\\d+",colnames(AllHRM))]
#Roll up the repeats in to one:
HRMImportMainTwo<-AllHRM %>%
  dplyr::group_by(HospNum_Id,DistalLESfromnarescm)%>%
  dplyr::summarise_all(.funs = function(x) paste(unique(c(dplyr::lag(x), x)), collapse = ":"))


#Clean Up the main HRM
HRMImportMainTwo<-HRMCleanUp1(HRMImportMainTwo)



#Merge the BRAVO results together:
AllBravo<-merge(BravoDayOneAndTwo,BravoDayThreeAndFour,by="BravoID",all=TRUE)
AllBravo<-merge(AllBravo,BRAVOTotal,by="BravoID",all=TRUE)
AllBravo<-dataBRAVOClean(AllBravo)
AllBravo<-dataBRAVODayLabeller(AllBravo,"HospNum_Id","VisitDate")
AllBravo<-dataBRAVOSymptoms(AllBravo)
#AllBravo<-GORD_AcidBRAVO(AllBravo)
AllBravo<-GORD_BravoWDAAndAverage(AllBravo,4.3)



#Get the BRAVO procedures


Procs<-data.frame(Procs,stringsAsFactors = FALSE)
Procs$DATEOFPROCEDURE<-as.Date(Procs$DATEOFPROCEDURE)

names(Procs)<-c("HospNum_Id","VisitDate.x","Findings","Oesophagitis","Grade")
Procs<-data.frame(Procs,stringsAsFactors = FALSE)



#MNBI<-MNBI%>%rename(HospNum_Id=HospitalNumber)
MNBI<-MNBI%>%dplyr::select(HospNum_Id,MethodOne_average,pspw_index,comments)%>%dplyr::rename(M1_av=MethodOne_average)


#Only select the MNBI final outputs from the two methods:
#MNBI$M2_noct<-as.numeric(MNBI$M2_noct)
MNBI$M1_av<-as.numeric(MNBI$M1_av)



##### CrossTestMerge ########################################################################################
## @knitr dataCrossTestMerge


#Merge all HRM with AllBravo to get the full dataset
#This merges the tests so that the nearest hrm to the BRAVO is chosen to avoid replication
#Note that a lot of Bravo's do not have HRM as may not have been requested by outside referrers



#Impedance Merges:
ImpAndhrm <- HRMImportMainTwo %>% inner_join(ImpAll, by ="HospNum_Id") %>%
  mutate(Date_ABS_Diff = abs(VisitDate.x - VisitDate.y)) %>%
  arrange(HospNum_Id, Date_ABS_Diff) %>%
  group_by(HospNum_Id) %>%
  slice(1)

ImpAndBravo <- AllBravo %>% inner_join(ImpAll, by ="HospNum_Id") %>%
  mutate(Date_ABS_Diff = abs(VisitDate.x - VisitDate.y)) %>%
  arrange(HospNum_Id, Date_ABS_Diff) %>%
  group_by(HospNum_Id) %>%
  slice(1)

#ImpAnddiag <- Diag %>% inner_join(ImpAll, by ="HospNum_Id") %>%
#  mutate(Date_ABS_Diff = abs(VisitDate.x - VisitDate.y)) %>%
#  arrange(HospNum_Id, Date_ABS_Diff) %>%
#  group_by(HospNum_Id) %>%
#  slice(1)


#BRAVO Merges:
BravoAnd_hrm <- HRMImportMainTwo %>% inner_join(AllBravo, by ="HospNum_Id") %>%
  mutate(Date_ABS_Diff = abs(VisitDate.x - VisitDate.y)) %>%
  arrange(HospNum_Id, Date_ABS_Diff) %>%
  group_by(HospNum_Id) %>%
  slice(1)

#BravoAnd_diag <- Diag %>% inner_join(AllBravo, by ="HospNum_Id") %>%
#  mutate(Date_ABS_Diff = abs(VisitDate.x - VisitDate.y)) %>%
#  arrange(HospNum_Id, Date_ABS_Diff) %>%
#  group_by(HospNum_Id) %>%
#  slice(1)


#Diag Merges:
#HRMAnd_diag <- HRMImportMainTwo %>% inner_join(Diag, by ="HospNum_Id") %>%
#  mutate(Date_ABS_Diff = abs(VisitDate.x - VisitDate.y)) %>%
#  arrange(HospNum_Id, Date_ABS_Diff) %>%
#  group_by(HospNum_Id) %>%
#  slice(1)


#Three test merges:
ImpAndBravoWithHRM <- HRMImportMainTwo %>% inner_join(ImpAndBravo, by ="HospNum_Id") %>%
  mutate(Date_ABS_Diff = abs(VisitDate.x - VisitDate.y)) %>%
  arrange(HospNum_Id, Date_ABS_Diff) %>%
  group_by(HospNum_Id) %>%
  slice(1)





#Now filter for only those tests where there is a year or less between the BRAVO and the pH impedance (in either direction)
ImpAndBravoWithHRM$diffImpAndBravoDays<-difftime(ImpAndBravoWithHRM$VisitDate.x,ImpAndBravoWithHRM$VisitDate.y.y,tz,units = c("days"))

#These are the ones excluded mostly as too big a time difference:
#View(NoImpAndBravoWithHRM_Orig%>%select(HospNum_Id,VisitDate.x,VisitDate.y.y,diffImpAndBravoDays))



ImpAndBravoWithHRM<-ImpAndBravoWithHRM%>%filter(diffImpAndBravoDays<365,diffImpAndBravoDays>-365)





##### Extrapolate ############################################################################################################
## @knitr dataForking
#Extract the negative impedance

#Get the age of the patient at the time of doing the test:
library(eeptools)
ImpAndBravoWithHRM$ageInYears<-age_calc(na.omit(ImpAndBravoWithHRM$MainPtDataDateofBirth),ImpAndBravoWithHRM$MainPtDataDateofAdmission,precise=TRUE)/12

#Get all the Negative Impedance patients (with both positive and negative BRAVO results-#AcidRefluxBRAVOAv is the outcome variable we will use)
NegImp_FromImpWithBRavoAndHRM<-ImpAndBravoWithHRM %>% filter(AcidReflux_Imp==0)
#Also need to get rid of the non-acid reflux patients defined as >80 reflux episodes in a patient with normal AET.
NegImp_FromImpWithBRavoAndHRM<-NegImp_FromImpWithBRavoAndHRM%>% filter((MainRflxEpisodeTotalAcid+MainRflxEpisodeTotalNonacid)<80)

#Get rid of all the BRAVO columns as we don't need these as predictive variables
NegImp_FromImpWithBRavoAndHRM<-NegImp_FromImpWithBRavoAndHRM[ , !grepl( "^t[Dd]ay" , names( NegImp_FromImpWithBRavoAndHRM ) ) ]
NegImp_FromImpWithBRavoAndHRM<-NegImp_FromImpWithBRavoAndHRM[ , !grepl( "[Ss]wallo" , names( NegImp_FromImpWithBRavoAndHRM ) ) ]

#Add the BRAVO procedure information:
NegImp_FromImpWithBRavoAndHRM<-merge(NegImp_FromImpWithBRavoAndHRM, Procs,by=c("HospNum_Id","VisitDate.x"),all.x=TRUE)


NegImp_FromImpWithBRavoAndHRM$Oesophagitis<-ifelse(grepl("Y",NegImp_FromImpWithBRavoAndHRM$Oesophagitis),"Y","N")


#Impute the gender
NegImp_FromImpWithBRavoAndHRM$Gender<-ifelse(NegImp_FromImpWithBRavoAndHRM$Gender=="null",sample(c("Male","Female"), 1),NegImp_FromImpWithBRavoAndHRM$Gender)




##### BRAVO description #############################################
## @knitr BRAVOdescription
library(ggbeeswarm)
library(ggplot2)
library(gridExtra)

#Need to replicate Sweis paper and show 1. SAP worst day =63.2% 2. SI Worst day =61% 3. pH worst day <4 (>4.2%) =47% and most in the first 24 hours 4. 45% GERD based on worst day SAP or pH <4

#Positive GORD by pH<4 in 4.3% Worst day overall analysis-***
#pHWDA<-(nrow(NegImp_FromImpWithBRavoAndHRM[NegImp_FromImpWithBRavoAndHRM$worstt>=7.1,])/nrow(NegImp_FromImpWithBRavoAndHRM))*100

#Positive GORD by SAP Worst day overall analysis-
SAPWDA<-(nrow(NegImp_FromImpWithBRavoAndHRM %>%select(contains("SAP")) %>%filter_all(any_vars(. >94.9)))/nrow(NegImp_FromImpWithBRavoAndHRM))*100

#Positive GORD by SI Worst day overall analysis-
#SIWDA<-(nrow(NegImp_FromImpWithBRavoAndHRM %>%select(contains("SI")) %>%filter_all(any_vars(. >50)))/nrow(NegImp_FromImpWithBRavoAndHRM))*100

#NegImp_FromImpWithBRavoAndHRM<-merge(NegImp_FromImpWithBRavoAndHRM,MNBI,by="HospNum_Id",all.x=TRUE)
NegImp_FromImpWithBRavoAndHRM<-NegImp_FromImpWithBRavoAndHRM %>% left_join(MNBI, by = "HospNum_Id")





NegImp_FromImpWithBRavoAndHRM<-as_tibble(NegImp_FromImpWithBRavoAndHRM)

#Get rid of the patients Ismail identified as being poor quality:
NegImp_FromImpWithBRavoAndHRM<-NegImp_FromImpWithBRavoAndHRM[is.na(NegImp_FromImpWithBRavoAndHRM$comments),]

#Some some extra diagnoses for the paper

NegImp_FromImpWithBRavoAndHRM$Hypersensitive_vs_FD<-ifelse(NegImp_FromImpWithBRavoAndHRM$SAPTotalHeartburn>94.9&
                                                       !is.na(NegImp_FromImpWithBRavoAndHRM$SAPTotalHeartburn),"HypersensitiveOesophagus",
                                                            ifelse(NegImp_FromImpWithBRavoAndHRM$SAPTotalHeartburn<94.9&
                                                              !is.na(NegImp_FromImpWithBRavoAndHRM$SAPTotalHeartburn),"Functional Heartburn","NotHypersens"))
# DEFINITION OF POSITIVE WPM FOR GORD HERE
#How many days positive of the 4? (ie how many days with pH>5.3 on any one day)
#Using the average day analysis definition:
NegImp_FromImpWithBRavoAndHRMOnlyGORD<-NegImp_FromImpWithBRavoAndHRM[NegImp_FromImpWithBRavoAndHRM$AcidRefluxBRAVOAv==1,]

#Average day analysis definition:
#NegImp_FromImpWithBRavoAndHRMOnlyGORD<-NegImp_FromImpWithBRavoAndHRM[NegImp_FromImpWithBRavoAndHRM$average>5.2,]

#Need to divide the number of positive results by the number for each day that actually have a result
#(from ReflDay1FractionTimepHLessThan4Total",
#"ReflDay2FractionTimepHLessThan4Total","ReflDay1_2FractionTimepHLessThan4Total","ReflDay2_2FractionTimepHLessThan4Total"
#"ReflDay3_2FractionTimepHLessThan4Total" and use table(NegImp_FromImpWithBRavoAndHRM$ReflDay2_2FractionTimepHLessThan4Total,exclude = NULL))

#Get table with the actual day to day figures only for the positive patients:
DayToDayVar<-NegImp_FromImpWithBRavoAndHRMOnlyGORD%>%select(ReflDay1FractionTimepHLessThan4Total,ReflDay2FractionTimepHLessThan4Total,ReflDay1_2FractionTimepHLessThan4Total,ReflDay2_2FractionTimepHLessThan4Total,ReflDay3_2FractionTimepHLessThan4Total,ReflDay4_2FractionTimepHLessThan4Total,worstDaypH,averageAll)

#--------------------------------
library(matrixStats)
#Get rid of outliers:
DayToDayVar$row_std = rowSds(as.matrix(DayToDayVar[,c(1,2,3,4,5,6)]),na.rm=TRUE)
DayToDayVar$CV = 100*(DayToDayVar$row_std /DayToDayVar$averageAll)
#DayToDayVar<-DayToDayVar%>%filter(averageAll<20)%>%select(-averageAll)
#DayToDayVar<-DayToDayVar%>%filter(ReflDay2_2FractionTimepHLessThan4Total<20|NA)



DayToDayVar<-data.frame(DayToDayVar)
names(DayToDayVar)<-c("Day1","Day2","Day3","Day4","Day5","Day6","worstDay","averageAll","stdev","CV")

#For a quick ANOVA to see if there is any variability in the days with WOM:

data_long <- tidyr::gather(DayToDayVar, condition, measurement, Day1:Day6, factor_key=TRUE)


AOV<-aov(measurement~condition,data=data_long)
summary(AOV)
#See if can get some groupings:
#Graphs where there is a drop from day 1 to day 2:
DayToDayVar_earlydrop<-DayToDayVar%>%filter((Day2-Day1)<0)%>%select(-worstDay,-averageAll,-stdev,-CV)
DayToDayVar_earlyrise<-DayToDayVar%>%filter((Day2-Day1)>0)%>%select(-worstDay,-averageAll,-stdev,-CV)
DayToDayVar_day1_3drop<-DayToDayVar%>%filter((Day3-Day1)<0)%>%select(-worstDay,-averageAll,-stdev,-CV)
DayToDayVar_day1_3rise<-DayToDayVar%>%filter((Day3-Day1)>0)%>%select(-worstDay,-averageAll,-stdev,-CV)
DayToDayVar_day1_3rise<-DayToDayVar%>%filter((Day3-Day1)>0)%>%select(-worstDay,-averageAll,-stdev,-CV)

#Get the worst days
DayToDayVar_worst_is_Day1<-DayToDayVar%>%filter(worstDay==1)%>%select(-worstDay,-averageAll,-stdev)
DayToDayVar_worst_is_Day1_CV<-mean(DayToDayVar_worst_is_Day1$CV)
DayToDayVar_worst_is_Day2<-DayToDayVar%>%filter(worstDay==2)%>%select(-worstDay,-averageAll,-stdev)
DayToDayVar_worst_is_Day2_CV<-mean(DayToDayVar_worst_is_Day2$CV)
DayToDayVar_worst_is_Day3<-DayToDayVar%>%filter(worstDay==3)%>%select(-worstDay,-averageAll,-stdev)
DayToDayVar_worst_is_Day3_CV<-mean(DayToDayVar_worst_is_Day3$CV)
DayToDayVar_worst_is_Day4<-DayToDayVar%>%filter(worstDay==4)%>%select(-worstDay,-averageAll,-stdev)
DayToDayVar_worst_is_Day4_CV<-mean(DayToDayVar_worst_is_Day4$CV)
#DayToDayVar %>% mutate(mak=max(Day1,Day2,Day3,Day4,Day5,Day6))
library(reshape)
df_WorstDayTBB<-data.frame(x = seq_along(DayToDayVar[,1]),DayToDayVar)
df_WorstDayTBB <- melt(df_WorstDayTBB, id.vars = "x")
mydf<-df_WorstDayTBB%>%filter(grepl("worstDay",variable))
df_WorstDayTBB<-df_WorstDayTBB%>%left_join(mydf, by = "x")
df_WorstDayTBB<-df_WorstDayTBB%>%filter(!grepl("worst|averageAll|stdev|CV",variable.x))
df_WorstDayTBB$variable.x<-as.numeric(gsub("Day","",df_WorstDayTBB$variable.x))


library(ggplot2)
#Get rid of outliers and >day 4
df_WorstDayTBB<-df_WorstDayTBB%>%filter(df_WorstDayTBB$value.x<20)
df_WorstDayTBB<-df_WorstDayTBB%>%filter(df_WorstDayTBB$value.y<5)

WorstDaysPlot <- ggplot(df_WorstDayTBB, aes(x = variable.x, y = value.x))+
geom_line(aes(group = x, color = x))+
facet_grid(cols = vars(value.y))+
geom_smooth(method = "lm", formula = y ~ x, size = 1,aes(group=variable.y),color="red")+  xlab("Day") + ylab("AET (%)")








#Get the comparison of the lines:


WorstDaysPlot<-WorstDaysPlot+
  theme_Publication()+
  theme(legend.position='none')+
  theme(axis.text.y = element_text(angle = 0,hjust = 1,size = 10))+
  theme(axis.text.x = element_text(angle = 0,vjust=1,hjust = 1,size = 8))+
  theme(axis.title.y = element_text(size = 8))+
  theme(axis.title.x = element_text(size = 12))+
  theme(strip.background = element_blank(), strip.placement = "outside",legend.title = element_blank())
#To Do- get the average of the line:



myworstDayTable<-data.frame(table(NegImp_FromImpWithBRavoAndHRMOnlyGORD$worstDaypH))
Day1TotNumPts<-myworstDayTable$Freq[1]/nrow(NegImp_FromImpWithBRavoAndHRMOnlyGORD[!is.na(NegImp_FromImpWithBRavoAndHRMOnlyGORD$ReflDay1FractionTimepHLessThan4Total),])*100
Day2TotNumPts<-myworstDayTable$Freq[2]/nrow(NegImp_FromImpWithBRavoAndHRMOnlyGORD[!is.na(NegImp_FromImpWithBRavoAndHRMOnlyGORD$ReflDay2FractionTimepHLessThan4Total),])*100
Day3TotNumPts<-myworstDayTable$Freq[3]/nrow(NegImp_FromImpWithBRavoAndHRMOnlyGORD[!is.na(NegImp_FromImpWithBRavoAndHRMOnlyGORD$ReflDay1_2FractionTimepHLessThan4Total),])*100
Day4TotNumPts<-myworstDayTable$Freq[4]/nrow(NegImp_FromImpWithBRavoAndHRMOnlyGORD[!is.na(NegImp_FromImpWithBRavoAndHRMOnlyGORD$ReflDay2_2FractionTimepHLessThan4Total),])*100
Day5TotNumPts<-myworstDayTable$Freq[5]/nrow(NegImp_FromImpWithBRavoAndHRMOnlyGORD[!is.na(NegImp_FromImpWithBRavoAndHRMOnlyGORD$ReflDay3_2FractionTimepHLessThan4Total),])*100
myworstDayTable$Percentage<-c(Day1TotNumPts,Day2TotNumPts,Day3TotNumPts,Day4TotNumPts,Day5TotNumPts)
names(myworstDayTable)<-c("Day","Freq","Percentage")


a2<-ggplot(myworstDayTable,aes(x=Day,y=Percentage))+
  geom_histogram(stat="identity")+
  scale_colour_Publication()+
  theme_Publication()+
  xlab("Worst Day AET pH<4 ")+
  ylab("% of studies")+
  theme(axis.text.y = element_text(angle = 0,hjust = 1,size = 10))+
  theme(axis.text.x = element_text(angle = 0,hjust = 1,size = 10))+
  theme(axis.title.y = element_text(size = 8))+
  theme(axis.title.x = element_text(size = 12))+
  labs(fill = "GERD")+
  scale_fill_discrete(name = "GERD", labels = c("No", "Yes"))


#This graph looks at the worst pH for WPM-GORD positive and WPM-GORD negative patients
#Cant seem to get rid of the outliers so have set the average limit to 25.
a3<-ggplot(NegImp_FromImpWithBRavoAndHRM%>%filter(worstt<25), aes(x = as.factor(AcidRefluxBRAVOAv),y=worstt,fill=as.factor(AcidRefluxBRAVOAv))) +
  geom_violin(opacity="50%")+
  geom_boxplot(outlier.shape = NA,width=0.4)+geom_beeswarm(size=0.3,priority='density',cex=1.5)+
  scale_x_discrete(labels=c("Negative","Positive"))+
  scale_colour_Publication()+
  theme_Publication()+
  theme(legend.position='none')+
  xlab("GERD")+
  ylab("Worst day AET")+
  theme(axis.text.y = element_text(angle = 0,hjust = 1,size = 10))+
  theme(axis.text.x = element_text(angle = 0,hjust = 1,size = 10))+
  theme(axis.title.y = element_text(size = 8))+
  theme(axis.title.x = element_text(size = 12))+
  labs(fill = "GERD") +
  scale_fill_grey(name = "GERD", labels = c("No", "Yes"),start = 0.5, end = 0.8)

myNumDayspositiveTable<-data.frame(table(NegImp_FromImpWithBRavoAndHRMOnlyGORD$NumDaysBravoPositive))

Day1TotNumPosPts<-myNumDayspositiveTable$Freq[1]/nrow(NegImp_FromImpWithBRavoAndHRMOnlyGORD[!is.na(NegImp_FromImpWithBRavoAndHRMOnlyGORD$ReflDay1FractionTimepHLessThan4Total),])*100
Day2TotNumPosPts<-myNumDayspositiveTable$Freq[2]/nrow(NegImp_FromImpWithBRavoAndHRMOnlyGORD[!is.na(NegImp_FromImpWithBRavoAndHRMOnlyGORD$ReflDay1FractionTimepHLessThan4Total),])*100
Day3TotNumPosPts<-myNumDayspositiveTable$Freq[3]/nrow(NegImp_FromImpWithBRavoAndHRMOnlyGORD[!is.na(NegImp_FromImpWithBRavoAndHRMOnlyGORD$ReflDay2FractionTimepHLessThan4Total),])*100
Day4TotNumPosPts<-myNumDayspositiveTable$Freq[4]/nrow(NegImp_FromImpWithBRavoAndHRMOnlyGORD[!is.na(NegImp_FromImpWithBRavoAndHRMOnlyGORD$ReflDay1_2FractionTimepHLessThan4Total),])*100
Day5TotNumPosPts<-myNumDayspositiveTable$Freq[5]/nrow(NegImp_FromImpWithBRavoAndHRMOnlyGORD[!is.na(NegImp_FromImpWithBRavoAndHRMOnlyGORD$ReflDay2_2FractionTimepHLessThan4Total),])*100
Day6TotNumPosPts<-myNumDayspositiveTable$Freq[6]/nrow(NegImp_FromImpWithBRavoAndHRMOnlyGORD[!is.na(NegImp_FromImpWithBRavoAndHRMOnlyGORD$ReflDay3_2FractionTimepHLessThan4Total),])*100
Day7TotNumPosPts<-myNumDayspositiveTable$Freq[7]/nrow(NegImp_FromImpWithBRavoAndHRMOnlyGORD[!is.na(NegImp_FromImpWithBRavoAndHRMOnlyGORD$ReflDay4_2FractionTimepHLessThan4Total),])*100

myNumDayspositiveTable$Percentage<-c(Day1TotNumPosPts,Day2TotNumPosPts,Day3TotNumPosPts,Day4TotNumPosPts,Day5TotNumPosPts,Day6TotNumPosPts)
names(myNumDayspositiveTable)<-c("Day","Freq","Percentage")




a4<-ggplot(myNumDayspositiveTable[2:nrow(myNumDayspositiveTable),],aes(x=Day,y=Percentage))+
  geom_histogram(stat="identity")+
  scale_colour_Publication()+
  theme_Publication()+
  xlab("Number of days of GERD")+
  ylab("% of studies")+
  theme(axis.text.y = element_text(angle = 0,hjust = 1,size = 10))+
  theme(axis.text.x = element_text(angle = 0,hjust = 1,size = 10))+
  theme(axis.title.y = element_text(size = 8))+
  theme(axis.title.x = element_text(size = 12))+
  labs(fill = "GERD") + scale_fill_discrete(name = "GORD", labels = c("No", "Yes"))
#+
#  theme(text=element_text(size=10, family="Arial"))+
 # ggsave("Figure1a.jpeg", width = 20, height = 20, dpi = 300, units="cm")


#Correlating the worst Day vs the number of positive days with BRAVO:

NumDays_vs_WorstDay<-merge(myNumDayspositiveTable[2:nrow(myNumDayspositiveTable),],myworstDayTable,by="Day")

NumDays_vs_WorstDayCorr<-cor.test(NumDays_vs_WorstDay$Percentage.x,NumDays_vs_WorstDay$Percentage.y)

############## Get the symptoms of the patients: ###########


interim<-strsplit(NegImp_FromImpWithBRavoAndHRM$AllSymps_BRAVO,",")
interim<-lapply(interim,function(x)unique(x,","))
interim<-lapply(interim,function(x) sort(x))
new<-unlist(lapply(interim,function(x) paste0(x,collapse=",")))
NegImp_FromImpWithBRavoAndHRM$AllSymps_BRAVO<-gsub("Epigastric","StomachPain",new)

BelchSx<-nrow(NegImp_FromImpWithBRavoAndHRM[grepl("Belch",NegImp_FromImpWithBRavoAndHRM$AllSymps_BRAVO),])
ChestPainSx<-nrow(NegImp_FromImpWithBRavoAndHRM[grepl("ChestPain",NegImp_FromImpWithBRavoAndHRM$AllSymps_BRAVO),])
CoughSx<-nrow(NegImp_FromImpWithBRavoAndHRM[grepl("Cough",NegImp_FromImpWithBRavoAndHRM$AllSymps_BRAVO),])
HeartburnSx<-nrow(NegImp_FromImpWithBRavoAndHRM[grepl("Heartburn",NegImp_FromImpWithBRavoAndHRM$AllSymps_BRAVO),])
RegurgitationSx<-nrow(NegImp_FromImpWithBRavoAndHRM[grepl("Regurgitation",NegImp_FromImpWithBRavoAndHRM$AllSymps_BRAVO),])
ThroatSx<-nrow(NegImp_FromImpWithBRavoAndHRM[grepl("Throat",NegImp_FromImpWithBRavoAndHRM$AllSymps_BRAVO),])
VomitingSx<-nrow(NegImp_FromImpWithBRavoAndHRM[grepl("Vomiting",NegImp_FromImpWithBRavoAndHRM$AllSymps_BRAVO),])
NauseaSx<-nrow(NegImp_FromImpWithBRavoAndHRM[grepl("Nausea",NegImp_FromImpWithBRavoAndHRM$AllSymps_BRAVO),])
StomachPainSx<-nrow(NegImp_FromImpWithBRavoAndHRM[grepl("StomachPain",NegImp_FromImpWithBRavoAndHRM$AllSymps_BRAVO),])


Symps<-data.frame(BelchSx,ChestPainSx,CoughSx,HeartburnSx,RegurgitationSx,ThroatSx,VomitingSx,NauseaSx,StomachPainSx)



Sympdf<-gather(Symps, "Symptom", "Frequency", 1:ncol(Symps))
Sympdf$Percentage<-((Sympdf$Frequency/nrow(NegImp_FromImpWithBRavoAndHRM))*100)
Sympdf$Symptom<-gsub("Sx","",Sympdf$Symptom)



a5<-ggplot(Sympdf,aes(x=Symptom,y=Frequency)) +
  geom_histogram(stat="identity")+
  scale_colour_Publication()+
  theme_Publication()+
  xlab("Symptom")+
  ylab("% of studies")+
  theme(axis.text.y = element_text(angle = 0,hjust = 1,size = 10))+
  theme(axis.text.x = element_text(angle = 30,vjust=1,hjust = 1,size = 8))+
  theme(axis.title.y = element_text(size = 8))+
  theme(axis.title.x = element_text(size = 12))
#+
  #theme(text=element_text(size=10, family="Arial"))+
  #ggsave("Figure1d.jpeg", width = 20, height = 20, dpi = 300, units="cm")

df_WorstDayTBB2<-df_WorstDayTBB%>%dplyr::rename(DayPlot=variable.x,AET=value.x,WorstDayGroup=value.y)%>%select(DayPlot,AET,WorstDayGroup)
my_means<-df_WorstDayTBB2%>%group_by(WorstDayGroup,DayPlot)%>%dplyr::summarize(Mean = mean(AET, na.rm=FALSE))
my_means<-data.frame(my_means)
my_means$WorstDayGroup<-as.character(my_means$WorstDayGroup)


compLines<-ggplot(data=my_means,aes(x=DayPlot,y=Mean,group=WorstDayGroup))+
  geom_smooth(method = "lm", formula = y ~ x, size = 1,aes(group=WorstDayGroup),color="red")+  xlab("Day") + ylab("AET (%)")+
  theme_Publication()+
  theme(axis.text.y = element_text(angle = 0,hjust = 1,size = 10))+
  theme(axis.text.x = element_text(angle = 0,vjust=1,hjust = 1,size = 8))+
  theme(axis.title.y = element_text(size = 8))+
  theme(axis.title.x = element_text(size = 12))+
  xlim(1,5)

library(patchwork)
patch<-(a4+a3)/(a2+a5)/(WorstDaysPlot+compLines)
patchwork<-patch & theme(legend.position='none')
#patchwork
patchwork+plot_annotation(tag_levels = c('A', '1'),
                 tag_suffix = '')+
theme(plot.tag.position = c(0, 0),
         plot.tag = element_text(size = 8, hjust = 0, vjust = 0))
#+
 # ggsave(here::here("inst","Projects","BRAVOStudies","NegImpPredictorsOfAllPosBRAVO","reports","Figure1_Complete.jpeg"), width = 20, height = 20, dpi = 300, units="cm")




##### Select columns with minimal data for data quality ############################################
## @knitr missingClean

#attr(NegImp_FromImpWithBRavoAndHRM$M2_noct, "label") <- "MethodTwo Nocturnal"

#Remove columns that are not meaningful:
#Impedance symptoms:
#Get all the MainAcidExposure/MainSx columns as your main impedance dataset. Note this also gets rid of composite score(=DeMeester) as it is derived from MainAcidExp so likely to correlate too much for regression

#NegImp_FromImpWithBRavoAndHRM$WorstD_ayAnalysisGORDPositive
NegImp_FromImpWithBRavoAndHRMMinSet<-NegImp_FromImpWithBRavoAndHRM[,grepl("ageInYears|MainAcidEx|MainSx|SxMain|HospNum_Id|DistalLESfromnarescm|Gender|DOBAge|Height|LESlengthcm|PIPfromnarescm|Hiatalhernia|BasalrespiratoryminmmHg|ResidualmeanmmHg|BasalrespiratorymeanmmHg|DistalcontractileintegralmeanmmHgcms|Contractilefrontvelocitycms|IntraboluspressureATLESRmmHg|Distallatency|failedChicagoClassification|panesophagealpressurization|largebreaks|Simultaneous|prematurecontraction|rapidcontraction|smallbreaks|VisitDate|Age|AcidReflux|Upright|Recumbent|SAP|M2_noct|M1_av|pspw|WorstD_|AllSymps_BRAVOgrouped|Oesophagitis|Grade|LESmidpointfromnarescm|MainRflxEpisodeTotalNonacid",names(NegImp_FromImpWithBRavoAndHRM))]

#attr(NegImp_FromImpWithBRavoAndHRMMinSet$M2_noct, "label") <- "MethodTwo Nocturnal"


#Also get rid of all columns containing unrelated symptoms and Nonacid and only look at the acid related things (so not "AllReflux")
NegImp_FromImpWithBRavoAndHRMMinSet<-NegImp_FromImpWithBRavoAndHRMMinSet[,!grepl("Unrelated|AllReflux",names(NegImp_FromImpWithBRavoAndHRMMinSet))]

#Remove RSSI as dont know what it is and SxCorr as it is just a number rather than a symptom score (RSI and SAP derived from it anyway)
NegImp_FromImpWithBRavoAndHRMMinSet<-NegImp_FromImpWithBRavoAndHRMMinSet[,!grepl("^SITotal|^SIDay|RSSI|SxCorr|ChannelTime|Duration|ClearanceTime|Composite|PP",names(NegImp_FromImpWithBRavoAndHRMMinSet))]

#Combine columns so they are more meaningful. Use the SI and SAP as potential parameters but only if significantly associated
#Combine symptoms so have only three categories- LPR (Throat/cough/Globus)/Oesophageal(Chest pain/regurgitation/belch/vomiting/heartburn)/Abdominal (Stomachpain)
#Also just code the symptoms as being present or not so if any of the members of the category are SAP >95% or SI >50% then it is a positive finding for that category


#Now get rid of BRAVO columns you dont need:
NegImp_FromImpWithBRavoAndHRMMinSet<-NegImp_FromImpWithBRavoAndHRMMinSet[,!grepl("Meds|UESMeanResidLocationcenterfrnarescm|ResidMeanbasalpressuremmHg|ResidMeanresidualpressuremmHg|MainProc|Catheter|NumDaysBravoPositive|Migration|HospNum_Id|HRM_Id|Physician|ReferringPhysician|Operator|BravoID|VisitDate|Stats|Procedure|FileCreationDate|TimeToNext|TimeSinceLast|BravoID|Imp_Id|^id$|PatientScore|MainPtData|GastricChannel|MainProcProcedureStart|MainProcProcedureDuration",names(NegImp_FromImpWithBRavoAndHRMMinSet))]


#Tells you which columns have the most missing data so they can be excluded
library(DataExplorer)
exclude_missing<-function(df){
  missing<-profile_missing(df)
  missing<-data.frame(missing,stringsAsFactors = FALSE)
  #Only <10% for each row missing data is accepted
  df_m<-missing[!grepl("Remove|Bad",missing$group),]
  #Pick the columns with good data in them from this set
  df<-df[,match(as.character(df_m$feature),names(df))]
  df<-data.frame(df,stringsAsFactors = FALSE)
  #3. Get some basic descriptive stats:
  #rr<-describe(df)
  #Select only the numeric variables
  #df<-dplyr::select_if(df, is.numeric)
  return(df)
}

#Only use columns with good quality data:
#NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal<-exclude_missing(NegImp_FromImpWithBRavoAndHRMMinSet)

NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal<-NegImp_FromImpWithBRavoAndHRMMinSet



############### Consort diagram ###############

library(DiagrammeR)
library(DiagrammeRsvg) #Needed if you want to export the image
#library(rsvg) #Needed if you want to export the image

#Set the values which will go into each label.
a1 <- paste0("a1: Number of  impedance studies =(",nrow(ImpAndhrm), ") \n Patients =",length(unique(ImpAndhrm$HospNum_Id)))
b1 <- ''
c1 <- ''
d1 <- paste0("d1: Number of Impedance studies \n negative for GORD=",nrow(ImpAndhrm[ImpAndhrm$AcidReflux_Imp==0,]), ")","\n Patients:",length(unique(ImpAndhrm[ImpAndhrm$AcidReflux_Imp==0,]$HospNum_Id)))
e1 <- paste0("e1: Number patients \n with negative impedance and subsequent BRAVO: " ,nrow(NegImp_FromImpWithBRavoAndHRM))


a2 <- ''
b2 <- ''
c2 <- ''
d2 <- ''
e2 <- ''

a3 <- ''
b3 <- ''
c3 <- ''
d3 <- ''
e3 <- ''

#Create a node dataframe
ndf <- create_node_df(
  n = 15,
  label = c(a1, b1, c1, d1, e1, #Column 1
            a2, b2, c2, d2, e2, #Column 2
            a3, b3, c3, d3, e3), #Column 2
  style = c('solid', 'invis', 'invis', 'solid', 'solid', #Column 1
            'invis', 'invis', 'invis', 'invis', 'invis', #Column 2
            'invis', 'invis', 'invis', 'invis', 'invis'), #Column 3
  shape = c('box', 'box', 'point', 'box', 'box', #Column 1
            'box', 'point', 'point', 'box', 'point', #Column 2
            'point', 'point', 'point', 'point', 'point'), #Column 3
  width = c(3, 0.001, 0.001, 3, 3, #Column 1
            0.001, 0.001, 0.001, 0.001, 0.001, #Column 2
            0.001, 0.001, 0.001, 0.001, 0.001), #Column 3
  height = c(1, 0.001, 0.001, 1, 1, #Column 1
             0.001, 0.001, 0.001, 0.001, 0.001, #Column 2
             0.001, 0.001, 0.001, 0.001, 0.001), #Column 3
  fontsize = c(rep(6, 1)),
  fontname = c(rep('Helvetica', 10)),
  penwidth = 1.5,
  fixedsize = 'true')

#Create an edge dataframe
edf <- create_edge_df(
  from = c(1,4#Horizontals
  ),
  to = c(4,5 #Horizontals
  ),
  arrowhead = c('normal', 'normal', 'none', 'none','none','none', #Column 1
                'none', 'none', 'normal', 'none', #Column 2
                'none', 'none' #Horizontals
  ),
  color = c('black', '#black', '#00000000', '#00000000', '#00000000', '#00000000', #Column 1
            '#00000000', '#00000000', '#00000000', '#00000000', #Column 2
            '#00000000', '#00000000' #Horizontals
  ),
  constraint = c(rep('true', 8), #Columns
                 rep('false', 2) #Horizontals
  )
)

g <- create_graph(ndf,
                  edf,
                  attr_theme = NULL)

render_graph(g)



######################  Day to Day ANOVA ########################################
## @knitr Day2DayANOVA

library(ez)
#res.aov <- anova_test(data = my_means, dv = score, wid = id, within = time)
#get_anova_table(res.aov)
my_means=my_means[my_means$DayPlot!=6,]
repeat1<-ezANOVA(data=my_means ,dv=.(Mean),wid=.(WorstDayGroup),within=.(DayPlot),type=3)





#library(lsmeans)

#m.interaction <- lm(Mean~WorstDayGroup*DayPlot,data=my_means)
#myDayToDayAnova<-anova(m.interaction)
#m.interaction$coefficients
#m.lst <- lstrends(m.interaction, "WorstDayGroup", var="DayPlot")
#comparisonRegressionLines<-pairs(m.lst)

#myDayToDayAnovadf<-data.frame(myDayToDayAnova)
#dayToDayAnovaTable<-flextable(myDayToDayAnovadf %>% tibble::rownames_to_column("column name"))%>%
#  set_caption(caption = "Table 1: Example") %>%
  #font(fontname = "Calibri (Body)", part = "all") %>%
  #fontsize(size = 10, part = "body") %>%
  # add footer if you want
  # add_footer_row(values = "* p < 0.05. ** p < 0.01. *** p < 0.001.",
  #                colwidths = 4) %>%
#  theme_booktabs() %>% # default theme
#  autofit()

#dayToDayAnovaTable

######################  Univariate Anaylsis OR ########################################
## @knitr HRM_UnivariateAnalysisOR




NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal$AcidRefluxBRAVOAv<-factor(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal$AcidRefluxBRAVOAv,
                                                                        levels=c(0,1),
                                                                        labels=c("Negative", # Reference
                                                                                 "Positive"))

NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal$WorstD_ayAnalysisGORDPositive<-factor(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal$WorstD_ayAnalysisGORDPositive,
                                                                          levels=c(0,1),
                                                                          labels=c("Negative", # Reference
                                                                                   "Positive"))



#Rearrange columns so can do the univariate analysis more efficiently:
NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal<-NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal %>% select(AcidRefluxBRAVOAv,1:12, everything())


#Create labels
#MNBI$M2_noct<-as.numeric(MNBI$M2_noct)
#MNBI$M1_av<-as.numeric(MNBI$M1_av)

#For the table get rid of variables you dont want to see
NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2<-NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal %>% dplyr::select(
  #-LOS_relax,
  -DOBAge,
  #-LowerOesoph,
 # -AllSymps_BRAVOcompartment,
 # -EsophageallengthLESUEScenterscm,
  #-ProximalLESfromnarescm,
  -DistalLESfromnarescm,
  -LESlengthcm,
  -Height,
  #-MainRflxEpisodeTotalAcid,
  -Simultaneous,
  -prematurecontraction,
  -rapidcontraction,
   -PIPfromnarescm,
   dplyr::contains("Oesophagitis"),
# dplyr::contains("M2_noct"),
 dplyr::contains("M1_av"),
   dplyr::contains("Method")
  -dplyr::contains("TotalAcid"),
  -dplyr::contains("SAPDay"),
  -dplyr::contains("SAP",ignore.case = FALSE),
  -dplyr::contains("SI",ignore.case = FALSE),
  -dplyr::contains("SAPTotal"),
  -dplyr::contains("SxMain"),
  -dplyr::contains("MainSx"),
  -IntraabdominalLESlengthcm,
  -Hiatalhernia,
 # -Findings,
  -BasalrespiratorymeanmmHg,
  -IntraboluspressureATLESRmmHg,
  # -DistalcontractileintegralhighestmmHgcms,
  # -AcidRefluxBRAVOTotalOnly,
  #-AllSymps_Impgrouped,
  #-AllSymps_BRAVO,
  #-AllImpSymptom,
  -AcidReflux_Imp,
  -Age,
   matches("SAPTotalHeartburn|SAPTotalChestPain|SAPTotalRegurgitation|SAPTotalBelch|SAPTotalCough|SAPTotalThroat|SAPTotalEpigastric|SAPTotalVomiting|SAPTotalNausea"),
  -matches("SAPTotalOther|SAPTotalPostPr|SAPTotalMeal"),
  -SAPOesophageal,
  -SAPOther,
  -SAPLPR,
  #-SIOesophageal,
  #-SIOther,
  #-SILPR,
  -contains("SAP"),
  -contains(".y"),
  -matches("worstt|worstDaypH|Day2|ReflD|DeMeester|Recumbent|Prox|Upright|Day1Pos|Day2Pos|Day3Pos|Day4Pos|Day5Pos|Day6Pos|Trajec")
  #-average,
  #-averageAll
 )

#Do some imputation for missing data:
#library(mice)

#imputed<-mice(data = NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2, m = 5, method = "pmm", maxit = 50, seed = 500)
#NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2<-complete(imputed)

#attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$M2_noct, "label") <- "MNBI Nocturnal"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$M1_av, "label") <- "MNBI"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$ageInYears, "label") <- "Age (years)"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$Oesophagitis, "label") <- "Oesophagitis"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$DistalcontractileintegralmeanmmHgcms, "label") <- "DCI (mean mmHg/cm/s)"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$Contractilefrontvelocitycms, "label") <- "Contractile front velocity (m/s)"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$Distallatency, "label") <- "Distal latency (s)"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$panesophagealpressurization, "label") <- "% Panoesophageal pressurization"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$largebreaks, "label") <- "% Large Breaks"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$smallbreaks, "label") <- "% Small breaks"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$MainRflxEpisodeTotalNonacid, "label") <- "Number of Non acid episodes"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$MainAcidExpTotalClearanceChannelNumberofAcidEpisodes, "label") <- "Number of Acid Episodes"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$MainAcidExpTotalClearanceChannelPercentTime, "label") <- "AET"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$MainAcidExpTotalClearanceChannelLongestEpisode, "label") <- "Longest acid episode"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$LESmidpointfromnarescm, "label") <- "LES midpoint from nares (cm)"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$BasalrespiratoryminmmHg, "label") <- "Basal resp. min. (mmHg)"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$ResidualmeanmmHg, "label") <- "Residual mean (mmHg)"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$failedChicagoClassification, "label") <- "% Failed peristalsis"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$AllSymps_BRAVOgrouped, "label") <- "All symptoms grouped"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$Oesophagitis, "label") <- "Endoscopic Findings"
attr(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$pspw_index, "label") <- "PSPWI"


#Symptom cleaning for analysis: Need to have as yes no answers rather than actual SAPs
# NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalHeartburn<-ifelse(!is.na(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalHeartburn),"Y","N")
# NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalChestPain<-ifelse(!is.na(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalChestPain),"Y","N")
# NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalRegurgitation<-ifelse(!is.na(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalRegurgitation),"Y","N")
# NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalBelch<-ifelse(!is.na(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalBelch),"Y","N")
# NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalCough<-ifelse(!is.na(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalCough),"Y","N")
# NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalThroat<-ifelse(!is.na(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalThroat),"Y","N")
# NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalEpigastric<-ifelse(!is.na(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalEpigastric),"Y","N")
# NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalVomiting<-ifelse(!is.na(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPTotalVomiting),"Y","N")
# NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPOesophageal<-ifelse(!is.na(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPOesophageal),"Y","N")
# NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPOther<-ifelse(!is.na(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPOther),"Y","N")
# NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPLPR<-ifelse(!is.na(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$SAPLPR),"Y","N")
# NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$MethodOne_average<-as.numeric(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$MethodOne_average)
# NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$MethodTwo_WholeNocturnalPeriod<-as.numeric(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$MethodTwo_WholeNocturnalPeriod)



#Get rid of rows where the impedance data is missing following the merge:

#Make sure the group to compare is the first column and then list the last column as the final variable to compare for each group:
library(compareGroups)
resOR <- compareGroups(AcidRefluxBRAVOAv ~ . , NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2, compute = TRUE)


########### Univariate predictors value #############################################
## @knitr UnivariatePredictorsValue
#Rearrange columns so age and gender are together
NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2<-NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2 %>% select(AcidRefluxBRAVOAv,ageInYears,1:12, everything())


#Remove some columns that arent for this table but are for the WDA analysis
NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal3<-NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2%>%select(-contains("worst"))


library(gtsummary)
so<-tbl_summary(
  data = NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal3,
  by = AcidRefluxBRAVOAv,missing = "no",
  type = list(all_numeric() ~ "continuous"),
  # change statistics printed in table
  statistic = list(all_continuous() ~ "{mean} ({sd})",
                   all_categorical() ~ "{n} / {N} ({p}%)"),
  digits = list("marker" ~ c("Y", "N"))
)
# add p-values, report t-test, round large pvalues to two decimal place
so<-add_p(so,test = list(vars(marker) ~ "t.test"),
          pvalue_fun = function(x) style_pvalue(x, digits = 2))

# bold variable labels, italicize levels
so<- bold_labels(so)
so<-italicize_levels(so)
# bold p-values under a given threshold (default is 0.05)
so<-bold_p(so,t = 0.05)
# include percent in headers
so<-modify_header(so,stat_by = "**{level}**, N = {n} ({style_percent(p, symbol = TRUE)})")
as_gt(so)

#Choose variables automatically from the univariate analysis:


sodf<-data.frame(so$meta_data,stringsAsFactors = FALSE)
sodf<-sodf%>%select(var_label,p.value)%>%filter(p.value < 0.5e-01)



NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal4<-NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2%>%select(-"AcidRefluxBRAVOAv")


soWDA<-tbl_summary(

  data = NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal4,
  by = WorstD_ayAnalysisGORDPositive,missing = "no",
  type = list(all_numeric() ~ "continuous"),
  # change statistics printed in table
  statistic = list(all_continuous() ~ "{mean} ({sd})",
                   all_categorical() ~ "{n} / {N} ({p}%)"),
  digits = list("marker" ~ c("Y", "N"))
)
# add p-values, report t-test, round large pvalues to two decimal place
soWDA<-add_p(soWDA,test = list(vars(marker) ~ "t.test"),
          pvalue_fun = function(x) style_pvalue(x, digits = 2))

# bold variable labels, italicize levels
soWDA<- bold_labels(soWDA)
soWDA<-italicize_levels(soWDA)
# bold p-values under a given threshold (default is 0.05)
soWDA<-bold_p(soWDA,t = 0.05)
# include percent in headers
soWDA<-modify_header(soWDA,stat_by = "**{level}**, N = {n} ({style_percent(p, symbol = TRUE)})")
as_gt(soWDA)

#Choose variables automatically from the univariate analysis:
soWDA_df<-data.frame(soWDA$meta_data,stringsAsFactors = FALSE)
soWDA_df<-soWDA_df%>%select(var_label,p.value)%>%filter(p.value < 0.5e-01)


options(gtsummary.as_gt.addl_cmds = "gt::tab_options(table.font.size = 'x-small', data_row.padding = gt::px(1))")

soWDAandAV<-tbl_merge(list(so,soWDA),tab_spanner = c("Average day analysis", "Worst day analysis"))
soWDAandAV




#Need to choose more variables here from the univariate above based on the p--value <0.05
chosen<-data.frame(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$AcidRefluxBRAVOAv, NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2[, Hmisc::label(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2) %in% sodf$var_label])
chosen<-chosen %>% dplyr::rename(AcidRefluxBRAVOAv = NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2.AcidRefluxBRAVOAv)

#Need to choose more variables here from the univariate above using average day analysis based on the p--value <0.05
chosenWDA<-data.frame(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2$WorstD_ayAnalysisGORDPositive, NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2[, Hmisc::label(NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2) %in% soWDA_df$var_label])
chosenWDA<-chosenWDA %>% dplyr::rename(WorstD_ayAnalysisGORDPositive = NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal2.WorstD_ayAnalysisGORDPositive)





########### Univariate predictors graph #############################################
## @knitr UnivariatePredictorsGraph


library(ggplot2)
p1<-ggplot(chosen, aes(x = AcidRefluxBRAVOAv, y = chosen[,2])) +
  geom_violin(outlier.shape = NA,fill='red',alpha=0.3)+geom_beeswarm(size=0.5,priority='density',cex = 2)+
  geom_boxplot(width=0.1,alpha=0.4)+
  scale_colour_Publication()+
  theme_Publication()+
  ylab(Hmisc::label(chosen[2]))+
  xlab("WPM-GERD")
#+
 # ggsave("Figure2a.jpeg", width = 20, height = 20, dpi = 300, units="cm")

p2<-ggplot(chosen, aes(x = AcidRefluxBRAVOAv, y = chosen[,3])) +
  geom_violin(outlier.shape = NA,fill='red',alpha=0.3)+geom_beeswarm(size=0.5,priority='density',cex = 2)+
  geom_boxplot(width=0.1,alpha=0.4)+
  scale_colour_Publication()+
  theme_Publication()+
  ylab(Hmisc::label(chosen[3]))+
  xlab("WPM-GERD")
#+
 # ggsave("Figure2b.jpeg", width = 20, height = 20, dpi = 300, units="cm")

p3<-ggplot(chosen, aes(x = AcidRefluxBRAVOAv, y = chosen[,4])) +
  geom_violin(outlier.shape = NA,fill='red',alpha=0.3)+geom_beeswarm(size=0.5,priority='density',cex = 2)+
  geom_boxplot(width=0.1,alpha=0.4)+
  scale_colour_Publication()+
  theme_Publication()+
  ylab(Hmisc::label(chosen[4]))+
  xlab("WPM-GERD")
#+
 # ggsave("Figure2c.jpeg", width = 20, height = 20, dpi = 300, units="cm")

#

  p4<-ggplot(chosen, aes(y = AcidRefluxBRAVOAv)) +
    geom_bar(aes(fill = chosen[,5]),position="fill") +
    scale_colour_Publication()+
    theme_Publication()+
    xlab("Number of studies")+
    ylab("GERD")+
    labs(fill = "Oesophagitis") + scale_fill_discrete(name = "Oesophagitis", labels = c("No", "Yes"))


   #theme(text=element_text(size=10, family="Arial")) +
   #ggsave("Figure2c.jpeg", width = 20, height = 20, dpi = 300, units="cm")


ggarrange(p1,p2,p3,p4,
          labels = c("a)","b)","c)"),
          nrow = 2,
          ncol = 2)+
  theme(text=element_text(size=6, family="Arial"))
 #ggsave("Figure2_Complete.jpeg", width = 20, height = 20, dpi = 300, units="cm")


########### Multiple Logistic Regression2 #############################################
## @knitr HRM_MultipleLogRegression2


#To get univariate and multivariate odds ratios:
library(finalfit)
chosen$AcidRefluxBRAVOAv<-as.factor(chosen$AcidRefluxBRAVOAv)

#Note for some reason I cant get Age and SAP ones in here
explanatory = names(chosen[,!grepl("AcidRefluxBRAVOAv",names(chosen))])
dependent = 'AcidRefluxBRAVOAv'


mytable<-chosen %>%
  finalfit.glm(dependent, explanatory)


#convert oesophagitis to numeric
chosen$Oesophagitis<-ifelse(grepl("Y",chosen$Oesophagitis),1,0)

mod1 <- glm(chosen$AcidRefluxBRAVOAv ~ ., data = dplyr::select_if(chosen, is.numeric), family = binomial(link = "logit"))
AvReg<-tbl_regression(mod1, exponentiate = TRUE)%>%bold_p(t = 0.05)


#Choose variables automatically from the univariate analysis:
AvReg_df<-data.frame(AvReg$table_body,stringsAsFactors = FALSE)
AvReg_df<-AvReg_df%>%select(variable,p.value)%>%filter(p.value < 0.5e-01)


####### For WDA:
#convert oesophagitis to numeric
chosenWDA$Oesophagitis<-ifelse(grepl("Y",chosenWDA$Oesophagitis),1,0)
chosenWDA$WorstD_ayAnalysisGORDPositive<-as.factor(chosenWDA$WorstD_ayAnalysisGORDPositive)

#Note for some reason I cant get Age and SAP ones in here
explanatory = names(chosenWDA[,!grepl("WorstD_ayAnalysisGORDPositive",names(chosenWDA))])
dependent = 'WorstD_ayAnalysisGORDPositive'


mytableWDA<-chosenWDA %>%
  finalfit.glm(dependent, explanatory)


#convert oesophagitis to numeric
#chosenWDA$Oesophagitis<-ifelse(grepl("Y",chosenWDA$Oesophagitis),1,0)

mod1WDA <- glm(chosenWDA$WorstD_ayAnalysisGORDPositive ~ ., data = dplyr::select_if(chosen, is.numeric), family = binomial(link = "logit"))
WDAReg<-tbl_regression(mod1WDA, exponentiate = TRUE)%>%bold_p(t = 0.051)
tbl_merge(list(AvReg,WDAReg),tab_spanner = c("Average day analysis", "Worst day analysis"))



########## Cut Point For pH <4 #############################################
## @knitr CutPointForAcid

chosen$AcidRefluxBRAVOAv<-as.character(chosen$AcidRefluxBRAVOAv)
#Determine optimal cutpoints for the Percent time <4 (the last column in the chosen table)
library(cutpointr)
vb<-cutpointr(x = chosen[,4], class = chosen$AcidRefluxBRAVOAv,na.rm=TRUE,method = maximize_metric, metric = sum_sens_spec)
dens1<-plot_x(vb)+geom_density(color="white", fill="grey")+
  xlab(Hmisc::label(names(chosen[ncol(chosen)])))+
  ggtitle(paste0(""))+
  labs(subtitle = "")+
theme(text=element_text(size=10, family="Arial"))
#ggsave("Figure3a.jpeg", width = 20, height = 20, dpi = 300, units="cm")
roc1<-plot_roc(vb)+ ggtitle(paste0(""))+
theme(text=element_text(size=10, family="Arial"))
#ggsave("Figure3b.jpeg", width = 20, height = 20, dpi = 300, units="cm")

# Look at the difference between the diagnoses of reflux with worst and average day analysis:
newout<-NegImp_FromImpWithBRavoAndHRMMinSetQualityFinal
newout$WorstD_ayAnalysisGORDPositive<-gsub("Negative",0,newout$WorstD_ayAnalysisGORDPositive)
newout$WorstD_ayAnalysisGORDPositive<-gsub("Positive",1,newout$WorstD_ayAnalysisGORDPositive)
newout$WorstD_ayAnalysisGORDPositive<-as.numeric(newout$WorstD_ayAnalysisGORDPositive)

newout$AcidRefluxBRAVOAv<-gsub("Negative",0,newout$AcidRefluxBRAVOAv)
newout$AcidRefluxBRAVOAv<-gsub("Positive",1,newout$AcidRefluxBRAVOAv)
newout$AcidRefluxBRAVOAv<-as.numeric(newout$AcidRefluxBRAVOAv)
newout$DiagnosticDifference<-newout$WorstD_ayAnalysisGORDPositive-newout$AcidRefluxBRAVOAv

#Just pick the ones that are different:
DiffDxPatients<-newout[newout$DiagnosticDifference!=0,]

DiffDxPatients<-DiffDxPatients%>%select(AcidRefluxBRAVOAv,DistalLESfromnarescm,Gender,DOBAge,
                        AllSymps_BRAVOgrouped,MainAcidExpTotalClearanceChannelPercentTime,
                        MainAcidExpTotalClearanceChannelNumberofAcidEpisodes,AcidReflux_Imp,
                        ageInYears,Oesophagitis,Grade,M1_av,pspw_index,DiagnosticDifference)



########## diagrammeR #############################################
## @knitr diagrammeR



#Other analyses:

#Day by day analysis to see if patients more likely to have intermittent symptoms especially for reflux and heartburn using the BRAVO results
#Compare BRAVO positive with BRAVO negative
#Are the hypersensitives also the ones with high AET. ie do HOvsHighAET NoHovs HighAET HOvsLowAET and NoHOvsLowAET and compare the groups (high AET is > optimal cutpoint)
sebastiz/PhysiMineR documentation built on Oct. 3, 2023, 3:46 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
sebastiz/PhysiMineR
Functions to extract, clean, manipulate and analyse upper gastrointestinal physiological parameters

inst/Projects/BRAVOStudies/NegImpPredictorsOfAllPosBRAVO/munge/PreProcessing.R
In sebastiz/PhysiMineR: Functions to extract, clean, manipulate and analyse upper gastrointestinal physiological parameters

R Package Documentation

Browse R Packages

We want your feedback!

sebastiz/PhysiMineR Functions to extract, clean, manipulate and analyse upper gastrointestinal physiological parameters

inst/Projects/BRAVOStudies/NegImpPredictorsOfAllPosBRAVO/munge/PreProcessing.R In sebastiz/PhysiMineR: Functions to extract, clean, manipulate and analyse upper gastrointestinal physiological parameters

R Package Documentation

Browse R Packages

We want your feedback!

sebastiz/PhysiMineR
Functions to extract, clean, manipulate and analyse upper gastrointestinal physiological parameters

inst/Projects/BRAVOStudies/NegImpPredictorsOfAllPosBRAVO/munge/PreProcessing.R
In sebastiz/PhysiMineR: Functions to extract, clean, manipulate and analyse upper gastrointestinal physiological parameters