# Copyright 2020 Observational Health Data Sciences and Informatics
#
# This file is part of PathwayVisualizer
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#' @import data.table
#' @import dplyr
#' @export
plotRaw_3 <- function(connectionDetails,
cohortDatabaseSchema,
cohortTable,
eventCohortIds,
conditionCohortIds = NULL,
numberedCohort,
cohortDescript,
combinationWindow,
maximumPathLength,
minimumPathLength,
minimumCellCount,
outputFileTitle,
outputFolderPath,
saveFile = TRUE){
# 1. Usage pattern graph
# 2. Treatment Iteration heatmap
# 3. Treatment Pathway - including table
if(!is.null(conditionCohortIds)){
conditionCohort <- loadCohort(connectionDetails,
cohortDatabaseSchema,
cohortTable,
conditionCohortIds)
numberedCohort <- numberedCohort %>% subset(subjectId %in% conditionCohort$subjectId)
}
numberedCohort <- numberedCohort %>% subset(cycle == 1)
cohortData <- numberedCohort %>% select(-cohortName,-cycle)
cohortData$cohortStartDate <- as.Date(cohortData$cohortStartDate)
cohortData$cohortEndDate <- as.Date(cohortData$cohortEndDate)
cohortData <- dplyr::left_join(cohortData,cohortDescript, by= c("cohortDefinitionId"="cohortDefinitionId"))
# Event cohort
if(!is.null(eventCohortIds)){
eventCohort <- loadCohort(connectionDetails,
cohortDatabaseSchema,
cohortTable,
eventCohortIds)
eventCohort <- dplyr::left_join(eventCohort,cohortDescript, by= c("cohortDefinitionId"="cohortDefinitionId"))
eventCohort <- eventCohort %>% subset(subjectId %in% unique(cohortData$subjectId))
if(!is.null(conditionCohortIds)){eventCohort <- eventCohort %>% subset(subjectId %in% conditionCohort$subjectId)}
colnames(eventCohort) <- colnames(cohortData)
eventCohort$cohortStartDate <- as.Date(eventCohort$cohortStartDate)
eventCohort$cohortEndDate <- as.Date(eventCohort$cohortEndDate)
}
# Ignore the change to same regimen
cohortData <- cohortData %>% arrange(subjectId,cohortStartDate) %>% group_by(subjectId) %>% mutate(lagCDI = lag(cohortName)) %>% subset(is.na(lagCDI)|lagCDI != cohortName) %>% select(-lagCDI)
cohortData <- as.data.frame(cohortData)
# Bind event and target cohort, Ignore duplicated event records
if(!is.null(eventCohortIds)){
eventAndTarget <- rbind(cohortData,eventCohort) %>% arrange(subjectId,cohortStartDate) %>% group_by(subjectId) %>% mutate(lagCDI = lag(cohortName)) %>% subset(is.na(lagCDI)|lagCDI != cohortName) %>% select(-lagCDI) %>% ungroup()
eventAndTarget$cohortName <- as.character(eventAndTarget$cohortName)
eventAndTarget <- as.data.frame(eventAndTarget)}else{
eventAndTarget <- cohortData %>% arrange(subjectId,cohortStartDate) %>% group_by(subjectId) %>% mutate(lagCDI = lag(cohortName)) %>% subset(is.na(lagCDI)|lagCDI != cohortName) %>% select(-lagCDI) %>% ungroup()
eventAndTarget$cohortName <- as.character(eventAndTarget$cohortName)
eventAndTarget <- as.data.frame(eventAndTarget)
}
# If treatment apart from each other less than combinationWindow, collapse using '+'
collapsedRecords <- data.table::rbindlist(lapply(unique(eventAndTarget$subjectId),function(targetSubjectId){
reconstructedRecords <- data.frame()
targeteventAndTarget <- eventAndTarget %>% subset(subjectId == targetSubjectId)
reconstructedRecords <- rbind(reconstructedRecords,targeteventAndTarget[1,])
if(nrow(targeteventAndTarget) >= 2){
for(x in 2:nrow(targeteventAndTarget)){
if(as.integer(targeteventAndTarget[x,3]-reconstructedRecords[nrow(reconstructedRecords),3])>combinationWindow){
reconstructedRecords <-rbind(reconstructedRecords,targeteventAndTarget[x,])}else{sortNames<-sort(c(targeteventAndTarget[x,5],reconstructedRecords[nrow(reconstructedRecords),5]))
reconstructedRecords[nrow(reconstructedRecords),5]<-paste0(sortNames,collapse = '+')
}
}
}
return(reconstructedRecords)
}
)
)
# Set minimum regimen change count
eventAndTarget <- collapsedRecords
minimunIndexId <- unique(eventAndTarget %>% arrange(subjectId,cohortStartDate) %>% group_by(subjectId) %>% mutate(line = row_number()) %>% subset(line >= minimumPathLength) %>% select(subjectId) %>% ungroup())
eventAndTarget <- eventAndTarget %>% subset(subjectId %in% minimunIndexId$subjectId) %>% arrange(subjectId,cohortStartDate)
# Maximum path length in graph
eventAndTarget <- eventAndTarget %>% group_by(subjectId) %>% arrange(subjectId,cohortStartDate) %>% mutate(rowNumber = row_number()) %>% subset(rowNumber <= maximumPathLength) %>% select(subjectId,cohortName,rowNumber) %>% mutate(nameOfConcept = paste0(rowNumber,'_',cohortName)) %>% ungroup()
# Padding only first line node
eventAndTarget <- rbind(eventAndTarget,eventAndTarget %>% subset(rowNumber == 1) %>% subset(!subjectId %in% (eventAndTarget %>% subset(rowNumber == 2))$subjectId) %>% mutate(rowNumber = 2, cohortName = "received only first line", nameOfConcept = "2_received only first line")) %>% arrange(subjectId, rowNumber)
rawData <- generateLinksAndNodes(targetData = eventAndTarget,
minimumCellCount,
maximumPathLength,
saveFile,
outputFolderPath,
outputFileTitle)
# 4. Event incidence in each cycle
# 5. Event onset timing
return(rawData)
}
#' @export
generateLinksAndNodes <- function(targetData,
minimumCellCount,
maximumPathLength,
saveFile,
outputFolderPath,
outputFileTitle){
# Exclude patients until minimum nodes cell count under criteria
nodePatientNo <- targetData %>% group_by(nameOfConcept) %>% summarise(n=n())
while(min(nodePatientNo$n) < minimumCellCount){
nodePatientNo <- targetData %>% group_by(nameOfConcept) %>% summarise(n=n())
targetData <- targetData %>% subset(!subjectId %in% (targetData %>% subset(nameOfConcept %in% (nodePatientNo %>% subset(n < minimumCellCount))$nameOfConcept))$subjectId)
}
# Nodes
treatmentRatio <- data.table::rbindlist(lapply(1:maximumPathLength,function(x){
result <- targetData %>% subset(rowNumber==x) %>% group_by(nameOfConcept) %>% summarise(n=n()) %>% mutate(ratio=round(n/sum(n)*100,1))
return(result)}
)
)
# Label
label <- unique(targetData %>% select(cohortName,nameOfConcept) %>% arrange(nameOfConcept))
label <- label %>% mutate(num = seq(from = 0,length.out = nrow(label)))
label <- dplyr::left_join(treatmentRatio,label,by=c("nameOfConcept"="nameOfConcept")) %>% mutate(name = paste0(cohortName,' (n=',n,', ',ratio,'%)'))
label <- label %>% mutate(num = seq(from = 0, length.out = nrow(label)))
nodes <- label %>% select(name,cohortName)
colnames(nodes) <- c('name','group')
nodes <- data.frame(nodes)
# Pivot table
pivotRecords <- reshape2::dcast(targetData,subjectId ~ rowNumber, value.var="nameOfConcept")
# Write pathway table
pathwayRecords <- lapply(1 : nrow(pivotRecords), function(x){
pathway <- paste0(stringr::str_sub((pivotRecords %>% select(-subjectId))[x,],start = 3), collapse = '-')
return(pathway)
}
)
pathway <- data.frame(unlist(pathwayRecords))
colnames(pathway) <- 'pathway'
pathwayTable <- pathway %>% group_by(pathway) %>% summarise(n=n()) %>% arrange(desc(n)) %>% mutate(percentLabel = round(n/sum(n)*100,2))
sumPathway <- sum(pathwayTable$n)
pathwayTable <- pathwayTable %>% mutate(label = paste0(n,' (',percentLabel,'%)')) %>% select(pathway,label)
colnames(pathwayTable) <- c('Pathway',paste0('N = ',sumPathway))
pathwayTable <- as.data.frame(pathwayTable)
if(saveFile){
fileNamePathway <- paste0(outputFileTitle,'_','pathway.csv')
write.csv(pathwayTable, file.path(outputFolderPath, fileNamePathway),row.names = F)
}
# Link
link <- data.table::rbindlist(lapply(2:max(targetData$rowNumber),function(x){
source <- pivotRecords[,x]
target <- pivotRecords[,x+1]
link <-data.frame(source,target)
link$source<-as.character(link$source)
link$target<-as.character(link$target)
link<-na.omit(link)
return(link)
}))
link$source <- as.character(link$source)
link$target <- as.character(link$target)
link <- link %>% select(source,target)%>% group_by(source,target)%>% summarise(n=n()) %>% ungroup()
group <- dplyr::left_join(link,label,by = c("source" = "nameOfConcept")) %>% select(cohortName)
source <- dplyr::left_join(link,label,by = c("source" = "nameOfConcept")) %>% select(num)
target <- dplyr::left_join(link,label,by = c("target" = "nameOfConcept")) %>% select(num)
freq <- link %>% select(n)
links <- data.frame(group,source,target,freq)
links <- na.omit(links)
colnames(links) <- c('group','source','target','value')
links$source <- as.integer(links$source)
links$target <- as.integer(links$target)
links$value <- as.numeric(links$value)
# Write raw data
treatment <- list(nodes=nodes,links=links,pathways = pathwayTable)
if(saveFile){
fileNameNodes <- paste0(outputFileTitle,'_','SankeyNodes.csv')
write.csv(nodes, file.path(outputFolderPath, fileNameNodes),row.names = F)
fileNameLinks <- paste0(outputFileTitle,'_','SankeyLinks.csv')
write.csv(links, file.path(outputFolderPath, fileNameLinks),row.names = F)
}
return(treatment)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.