# Copyright 2020 Observational Health Data Sciences and Informatics
#
# This file is part of CancerTxPathway
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#' Incidence comparison plot
#' Compare the incidence of event in each treatment
#' @param connectionDetails
#' @param cohortDatabaseSchema
#' @param cohortTable
#' @param targetCohortIds
#' @param conditionCohortIds
#' @param eventCohortIds
#' @param identicalSeriesCriteria
#' @param eventPeriod
#' @param minSubject
#' @param restrictInitialSeries
#' @param outputFolder
#' @param outputFileTitle
#' @keywords Incidence
#' @return Incidence plot
#' @examples
#' @import dplyr
#' @import reshape2
#' @import ggplot2
#' @import scales
#' @import gridExtra
#' @import viridis
#' @export cycleIncidencePlot
cycleIncidencePlot<-function(connectionDetails,
cohortDatabaseSchema,
cohortTable,
outputFolder = NULL,
outputFileTitle = NULL,
targetCohortIds,
conditionCohortIds,
eventCohortIds,
restrictInitialSeries = TRUE,
restricInitialEvent =TRUE,
identicalSeriesCriteria = 60,
eventPeriod = 30,
minSubject = 0){# Cohort information
cohortDescript <-cohortDescription()
# Pull cohort data
targetCohort<-cohortCycle(connectionDetails,
cohortDatabaseSchema,
cohortTable,
targetCohortIds,
identicalSeriesCriteria,
conditionCohortIds)
# Restrict Initial Series
if(restrictInitialSeries){
cohortFirstIndex<-targetCohort %>% subset(cycle == 1) %>% arrange(subjectId,cohortStartDate) %>% group_by(subjectId) %>% mutate(index= row_number())
indexedCohort<-left_join(targetCohort,cohortFirstIndex)
indexedCohort$index<-data.table::nafill(indexedCohort$index, type = "locf")
targetCohort<-indexedCohort %>% subset(index == 1) %>% select(-index)
}
eventCohort<-cohortRecords(connectionDetails,
cohortDatabaseSchema,
cohortTable,
eventCohortIds)
eventCohort <- dplyr::left_join(eventCohort,cohortDescript, by= c("cohortDefinitionId"="cohortDefinitionId"))
eventCohort <- unique(eventCohort %>% mutate (cycle = 0) %>% select(-type) %>% subset(subjectId %in% targetCohort$subjectId)) %>% select(-conceptId)
# Cohort name cycle
collapsedCohort<-rbind(targetCohort,eventCohort) %>% arrange(subjectId,cohortStartDate) %>% mutate(cohort_cycle = paste0(cycle,'_',
cohortName
))
# Prev record
collapsedCohort<-collapsedCohort %>% arrange(subjectId,cohortStartDate,desc(cohort_cycle))%>% group_by(subjectId) %>% mutate(prev_c_n_c = lag(cohort_cycle)) %>% mutate(prevDate = lag(cohortStartDate)) %>% ungroup()
# Event after target
eventAfterTarget<-unique(na.omit(collapsedCohort %>% subset(cohortName %in% unique(eventCohort$cohortName)) %>% subset(cohort_cycle != prev_c_n_c)) %>% subset(cohortStartDate-prevDate<= eventPeriod))
if(restricInitialEvent){eventAfterTarget<-eventAfterTarget %>% arrange(subjectId,cohortStartDate)%>% group_by(subjectId) %>% slice(1)}
summariseEvent <- unique(eventAfterTarget %>% group_by(prev_c_n_c))%>% summarise(n=n())
summariseTarget <- unique(targetCohort %>% mutate(cohort_cycle = paste0(cycle,'_',cohortName)) %>% group_by(cohort_cycle)) %>% summarise(n=n())
# Collapse summarised data
collapsedSummarise<-left_join(summariseTarget,summariseEvent, by=c('cohort_cycle'="prev_c_n_c"))
colnames(collapsedSummarise) <-c('cohort_cycle','total','event')
collapsedSummarise<-as.data.frame(collapsedSummarise)
collapsedSummarise[is.na(collapsedSummarise)] <-0
# Over the minimum patient number
collapsedSummarise <- collapsedSummarise %>% subset(total >= minSubject)
seperateNameIndex<-unique(targetCohort %>% mutate(cohort_cycle = paste0(cycle,'_',cohortName)) %>% select(cohortName,cycle,cohort_cycle))
# Plot data
plotData<-left_join(collapsedSummarise,seperateNameIndex) %>% mutate(ratio = event/total) %>% select(cycle,cohortName,event,total,ratio,cohort_cycle) %>% arrange(cohortName,cycle)
if(!is.null(outputFolder)){
fileName <- paste0(outputFileTitle,'_','EventIncidenceInCycle.csv')
write.csv(plotData, file.path(outputFolder, fileName),row.names = F)}
# plot #1 - Incidence Rate
p1 <- ggplot(na.omit(plotData), aes(x = cohort_cycle, y = ratio, group = cohortName, color = cohortName)) +
theme_bw() +
scale_x_discrete(limits = na.omit(plotData)$cohort_cycle) +
geom_point(size = 2, aes(fill = cohortName)) +
geom_text(aes(label = percent(round(ratio, 2))),
size = 4, hjust = -0.01, vjust = -0.2, fontface = "plain") +
geom_smooth(size = 1.0, method = 'lm', aes(fill = cohortName)) +
scale_fill_viridis(discrete=TRUE) +
scale_color_viridis(discrete=TRUE) +
theme(legend.position='none',
plot.title = element_text(size=20, face="bold", vjust=2),
axis.title.x = element_blank(),
axis.title.y = element_text(size=13),
axis.text = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
labs(y = 'Incidence Rate') +
ggtitle("Event Incidence Rate - Cycle plot")
# plot #2 - number of Incidence
p2 <- ggplot(na.omit(plotData), aes(x = cohort_cycle, group = cohortName, color = cohortName)) +
theme_bw() +
scale_x_discrete(limits = na.omit(plotData)$cohort_cycle) +
geom_bar(aes(y = total, fill = cohortName), alpha = 0.2, stat = 'identity',show.legend = TRUE) +
geom_bar(aes(y = event, fill = cohortName), alpha = 0.6, stat = 'identity',show.legend = FALSE) +
geom_text(aes(y = total, label = paste(event,'/',total)),
angle = 90, size = 4, hjust = -0.1, vjust = 0.4,check_overlap = TRUE,show.legend = FALSE, position = position_dodge(width = 0.6)) +
geom_text(aes(y = 0, label = cycle), angle = 90, size = 4, hjust = 1.25, vjust = 0.4,check_overlap = TRUE,show.legend = FALSE) +
scale_fill_viridis(discrete=TRUE) +
scale_color_viridis(discrete=TRUE) +
theme(legend.position='bottom',
legend.title=element_blank(),
plot.title = element_text(size=20, vjust=2),
axis.title.x = element_text(size=13),
axis.title.y = element_text(size=13),
axis.text = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
scale_y_continuous(limits = c(0, max(plotData$total * 1.1))) +
labs(x = 'Iteration Number of treatment', y = 'Number of Incidence')
# multiplot
p<-grid.arrange(p1, p2, ncol = 1)
return(p)}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.