#' A class for careflow mining
#'
#' @description This is an implementation of the First Order Markov Model (FOMM) for Process Mining issues.
#' This class provides a minimal set of methods to handle with the FOMM model:
#' In order to better undestand the use of such methods, please visit: www.pminer.info
#'
#' The consturctor admit the following parameters: is a boolean which indicates if the autoloops have to be admitted, while 'threshold' is the minimum value that a probability should have to do not be set to zero, in the transition matrix.
#' @export
careflowMining<-function( parameters.list = list() ) {
parameters<-list()
evLog <- ''
res <- ''
allHistEnrich <- ''
time.unit.global <- ''
#===========================================================
# loadDataset
#===========================================================
loadDataset<-function( dataList , end.date , format.end.date) {
for (pat in names(dataList$pat.process)){
newdate <- as.Date(as.character(dataList$pat.process[[pat]][,end.date]), format.end.date)
dataList$pat.process[[pat]][,end.date] <- format(newdate, "%d/%m/%Y %H:%M:%S")
names(dataList$pat.process[[pat]])[which(names(dataList$pat.process[[pat]])==dataList$csv.EVENTName)] <- "EVENT"
names(dataList$pat.process[[pat]])[which(names(dataList$pat.process[[pat]])==dataList$csv.dateColumnName)] <- "DATE_INI"
if(dataList$csv.dateColumnName != end.date){
names(dataList$pat.process[[pat]])[which(names(dataList$pat.process[[pat]])==end.date)] <- "DATE_END"
}
else {
dataList$pat.process[[pat]][,"DATE_END"] <- dataList$pat.process[[pat]][,"DATE_INI"]
}
}
evLog <<- dataList$pat.process
}
#===========================================================
# trainModel
#===========================================================
trainModel<-function(time.unit = "days") {
######## Pre-processing
# build mat_data (flattened event log without dates)
time.unit.global <<- time.unit
mat_data_names <- c("ID",seq(1:parameters$max.length))
mat_data <- as.data.frame(matrix(data = NA, nrow = length(evLog), ncol = 1 + parameters$max.length))
names(mat_data) <- mat_data_names
mat_data$ID <- names(evLog)
for(pat in names(evLog)){
mat_data[which(mat_data$ID==pat),2:ncol(mat_data)] <- evLog[[pat]][,"EVENT"][1:parameters$max.length]
}
# build mat_date_start
mat_date_start <- as.data.frame(matrix(data = NA, nrow = length(evLog), ncol = 1 + parameters$max.length))
names(mat_date_start) <- mat_data_names
mat_date_start$ID <- names(evLog)
for(pat in names(evLog)){
mat_date_start[which(mat_date_start$ID==pat),2:ncol(mat_data)] <- paste(as.Date(evLog[[pat]][,"DATE_INI"][1:parameters$max.length], format="%d/%m/%Y %H:%M:%S"))
}
# build mat_date_end
mat_date_end <- as.data.frame(matrix(data = NA, nrow = length(evLog), ncol = 1 + parameters$max.length))
names(mat_date_end) <- mat_data_names
mat_date_end$ID <- names(evLog)
for(pat in names(evLog)){
mat_date_end[which(mat_date_end$ID==pat),2:ncol(mat_data)] <- paste(as.Date(evLog[[pat]][,"DATE_END"][1:parameters$max.length], format="%d/%m/%Y %H:%M:%S"))
}
mat <- list("mat_data"=mat_data,"mat_date_start"=mat_date_start,"mat_date_end"=mat_date_end)
######## End Pre-processing
######## Begin Mining
selected<-selection(mat$mat_data,parameters$support.th)
historyFE<-createhistFIRSTTIME(selected)
#if (ncol(historyFE)>=max.length){print("NO HISTORIES, TRY LOWER SUPPORT")}
# histories<-list()
# EVENTdurations<-list()
# ARCHESdurations<-list()
# IDcounts<-list()
# IDss<-list()
results<-list()
for(j in 1:nrow(historyFE)) {
history<-data.frame()
EVENTduration<-data.frame()
ARCHESduration<-data.frame()
IDcount<-data.frame()
IDs<-list()
results[[j]]<-list()
#initialize history
history<-subset(historyFE,historyFE$EVENT_1==historyFE[j,1])
matHIST<-data.frame()
matHIST<-subset(mat$mat_data, mat$mat_data[,2] == paste(history$EVENT_1))
#Patients count
tabPts <- as.data.frame(table(matHIST[,2]))
IDcount<-data.frame(EVENT_1=tabPts$Freq)
#Initialize the list with all the patietns "%d/%m/%Y %H:%M:%S"
IDs<-list(matHIST[,1])
#initialize date start and end
matHIST_dateStart<-subset(mat$mat_date_start, mat$mat_data[,2] == paste(history$EVENT_1))
matHIST_dateEnd<-subset(mat$mat_date_end, mat$mat_data[,2] == paste(history$EVENT_1))
#initialize events - take the first event of the history, compute event duration
EVENTduration<-data.frame(EVENT_1= mean( as.integer(difftime(matHIST_dateEnd[,2],matHIST_dateStart[,2], units = time.unit ) )))
#initialize arches - take history that end after the first event, compute arches duration
ARCHESduration<-data.frame(EVENT_1= mean( as.integer(difftime(matHIST_dateStart[is.na(matHIST[,3]),2] ,matHIST_dateEnd[is.na(matHIST[,3]),2] , units = time.unit) )))
for (times in 1:(parameters$max.length-1)) { #here cycle through EVENT
# if(times>1){
# #browser()
# #current_event<-history[,length(history)]
# #if(length(unique(current_event))==1 && unique(current_event)=="OUT"){break}
# if(length(unique(event))==1 && unique(event)=="OUT") break
# }
#if (times==3) browser()
#Add OUT event
history<-cbind(history,as.character("OUT"))
names(history)[ncol(history)]<-paste0("EVENT_",ncol(history))
history <- data.frame(lapply(history, as.character), stringsAsFactors=FALSE)
#Add zero into the out event
IDcount<-cbind(IDcount,0)
names(IDcount)[ncol(IDcount)]<-paste0("EVENT_",ncol(IDcount))
#Add last value corresponding to out event (-1)
EVENTduration<-cbind(EVENTduration,-1)
names(EVENTduration)[ncol(EVENTduration)]<-paste0("EVENT_",ncol(EVENTduration))
#Add last value corresponding to out event (-1)
ARCHESduration<-cbind(ARCHESduration,-1)
names(ARCHESduration)[ncol(ARCHESduration)]<-paste0("EVENT_",ncol(ARCHESduration))
#At each time take only patients that verifies frequent histories
PtsToTake <- c()
#========EVENT SELECTION
event<-history[,length(history)-1] #consider the last events of previous history
for(y in 1:length(event)){
if (event[y]!="OUT"){
#CHECK to have at least one column to process in the computenewMat function
if (ncol(matHIST) <= 2) {break}
else {
#update event and dates matrix
matNEW<-computeNewMat(event[y],matHIST)
matNEW_dateStart<-computeNewMatDates(matHIST_dateStart,matNEW$ID)
matNEW_dateEnd<-computeNewMatDates(matHIST_dateEnd,matNEW$ID)
#=========select events to ADD
selected<-selection(matNEW,parameters$support.th)
#
if(length(selected)==0) {}
else{
#Count Patients
PtsCompute <- computeIDcount(event[y], selected, history,IDcount,IDs,matNEW)
IDcountNew<-PtsCompute[[1]]
IDcount<-rbind(IDcount,IDcountNew)
#Pts Ids
IDs<-PtsCompute[[2]]
#Pts to take
PtsToTake <- c(PtsToTake,PtsCompute[[3]])
#Compute Event duration
EVENTdurationNew<-computeEventDuration(event[y],selected,history,EVENTduration,matNEW,matNEW_dateStart,matNEW_dateEnd)
EVENTduration<-rbind(EVENTduration,EVENTdurationNew)
#Compute Arches duration
ARCHESdurationNew<-computeArchesDuration(event[y],selected,history,ARCHESduration,matHIST,matHIST_dateStart,matHIST_dateEnd)
ARCHESduration<-rbind(ARCHESduration,ARCHESdurationNew)
#Create new history
historynew<-createhist(event[y],selected,history)
history<-rbind(history,historynew)
#REMOVE DUPLICATES
INDEX <- !duplicated(history)
history<-history[INDEX, ]
IDcount<-IDcount[INDEX, ]
EVENTduration<-EVENTduration[INDEX, ]
ARCHESduration<-ARCHESduration[INDEX, ]
}
}
}
}
if(is.data.frame(matHIST)==FALSE){break} #check if there are still column to process
else {
matHIST<-matHIST[matHIST$ID %in% unique(PtsToTake),-2]
matHIST_dateEnd<-matHIST_dateEnd[matHIST_dateEnd$ID %in% unique(PtsToTake),-2]
matHIST_dateStart<-matHIST_dateStart[matHIST_dateStart$ID %in% unique(PtsToTake),-2]
}
}
# histories[[j]]<-history
# IDcounts[[j]]<-IDcount
# IDss[[j]]<-IDs
# EVENTdurations[[j]]<-EVENTduration
# ARCHESdurations[[j]]<-ARCHESduration
#browser()
results[[j]] <- list("history"=history,"EVENTduration"=EVENTduration,"ARCHESduration"=ARCHESduration,
"IDcounts"=IDcount,"IDss"=IDs)
}
res <<- results
######## End Mining
#historyData <<- getHistoryData(results)
}
#===========================================================
# getModel
#===========================================================
getModel <- function(){
return(res)
}
#===========================================================
# clinicalEnrichment
#===========================================================
enrichModel <-function( clinicalDataFrame ,clinicalDataFile="",sep=";",header=T,dec = ".", value = "", ID="", date="",date.format="") {
historyData <- getHistoryData()
if (clinicalDataFile!=""){
clinicalData <- read.table(file = clinicalDataFile, sep = sep, dec = dec, header=header,stringsAsFactors = FALSE)
}
else {
clinicalData <- clinicalDataFrame
}
colnames(clinicalData)[which(colnames(clinicalData)==ID)] <- "ID"
if (date.format!= "%d/%m/%Y %H:%M:%S"){
newdate <- as.Date(as.character(clinicalData[,date]), date.format)
clinicalData[,date] <- format(newdate, "%d/%m/%Y %H:%M:%S")
}
clinicalData$data <- clinicalData[[date]]
histEnrich <- list()
for(hist in 1:length(res)){
actualIds <- historyData$actualIds[[hist]]
arcEnrich <- as.data.frame(matrix(data = NA,nrow = nrow(res[[hist]]$history),ncol= ncol(res[[hist]]$history) ))
arcEnrich.shadow <- as.data.frame(matrix(data = NA,nrow = nrow(res[[hist]]$history),ncol= ncol(res[[hist]]$history) ))
arcEnrich.npat <- as.data.frame(matrix(data = NA,nrow = nrow(res[[hist]]$history),ncol= ncol(res[[hist]]$history) ))
arcEnrich.nonavalues <- as.data.frame(matrix(data = NA,nrow = nrow(res[[hist]]$history),ncol= ncol(res[[hist]]$history) ))
arcEnrich.navalues <- as.data.frame(matrix(data = NA,nrow = nrow(res[[hist]]$history),ncol= ncol(res[[hist]]$history) ))
names(arcEnrich) <- names(res[[hist]]$history)
history <- res[[hist]]$history
for (j in 1:nrow(history)){
#browser()
tmp <- actualIds[[j]]
clinDataTmp <- clinicalData[which(clinicalData$ID %in% tmp),]
ll <- length(history[j,][which(history[j,]!="OUT")])
if(history[j,dim(history)[2]] == "OUT"){
arcEnrich[j,(ll+1):dim(arcEnrich)[2]] <- NA
arcEnrich.shadow[j,(ll+1):dim(arcEnrich.shadow)[2]] <- "OUT"
}
meanValue <- numeric()
if(ll > 1) {
#browser()
for(ev in 1:(ll-1)){
datesDF <- data.frame(ID=names(evLog[tmp]),date_ini1=NA,date_ini2=NA,stringsAsFactors = FALSE)
for(pat in names(evLog[tmp])){
#dates <- evLog[tmp][[pat]]$DATE_INI[ev:(ev+1)]
datesDF[which(datesDF$ID==pat),2]<-evLog[tmp][[pat]][,"DATE_INI"][ev]
datesDF[which(datesDF$ID==pat),3]<-evLog[tmp][[pat]][,"DATE_INI"][ev+1]
}
allThisArchData <- merge(clinDataTmp, datesDF, by=c("ID"))
clinSubsetByDates <- allThisArchData[which(allThisArchData$data > as.Date(as.character(allThisArchData$date_ini1),format = "%d/%m/%Y") & allThisArchData$data < as.Date(as.character(allThisArchData$date_ini2),format = "%d/%m/%Y")),]
if(length(clinSubsetByDates[,value])!=0){
arcEnrich[j,ev+1] <- mean(clinSubsetByDates[,value],na.rm=T)
arcEnrich.shadow[j,ev+1] <- "ok"
arcEnrich.npat[j,ev+1] <- length(unique(clinSubsetByDates$ID))
arcEnrich.nonavalues[j,ev+1] <- length(clinSubsetByDates[,value])
arcEnrich.navalues[j,ev+1] <- length(which(is.na(clinSubsetByDates[,value])))
}
else {
arcEnrich[j,ev+1] <- NA
arcEnrich.shadow[j,ev+1] <- "no values"
arcEnrich.npat[j,ev+1] <- 0
arcEnrich.nonavalues[j,ev+1] <- 0
arcEnrich.navalues[j,ev+1] <- 0
}
}
}
else {
next
}
}
histEnrich[[hist]] <- list("arcEnrich"=arcEnrich,"arcEnrich.shadow"=arcEnrich.shadow,
"arcEnrich.npat"=arcEnrich.npat, "arcEnrich.nonavalues"=arcEnrich.nonavalues,
"arcEnrich.navalues"=arcEnrich.navalues)
}
allHistEnrich <<- histEnrich
}
#===========================================================
# plotModel
#===========================================================
plot<-function( kindOfOutput ="time") {
graphics <- list()
bb <- getHistoryData()
if(kindOfOutput=="time"){
for(hist in 1:length(res)){
HistMined<-res[[hist]]$history
IDcountsLIST<-res[[hist]]$IDcounts
EVENT.durations<-res[[hist]]$EVENTduration
ARCHES.durations<-res[[hist]]$ARCHESduration
IDsTOTTime <- bb$IDsTOTTime[[hist]]
nodeTemp<-c()
arches<-c()
for (k in 1:nrow(HistMined)){
# Create a simple NODES
for (j in 1:ncol(HistMined)){
if (HistMined[k,j] != "OUT"){
nodeTemp<-c(nodeTemp, paste(j, HistMined[k,j],
"\n","pts.",IDcountsLIST[k,j],
"\n",paste("event duration:",time.unit.global),trunc(EVENT.durations[k,j]) ))
arches<-c(arches, trunc(ARCHES.durations[k,j]))
}
else {
if (HistMined[k,j-1] != "OUT" ){
nodeTemp<-c(nodeTemp, paste(k, HistMined[k,j],
"\n","pts.",
IDsTOTTime[k,]$TotPts,
"\n",paste("Total history duration:",time.unit.global),
trunc(IDsTOTTime[k,]$MeanMaxtimesDays)
))
arches<-c(arches, trunc(ARCHES.durations[k,j]))
}}}}
# Create a simple NDF
nodes <-create_node_df(n= length(unique(nodeTemp)), type = "chr", label = unique(nodeTemp))
arches <-create_node_df(n= length((arches)), type = "chr", label = (arches))
arches<-arches[-1,]
aaa<-data.frame( from = nodeTemp[1:length(nodeTemp)-1],to = nodeTemp[2:length(nodeTemp)])
aaa$trans<-arches$label
aaa<-subset(aaa,aaa$trans!="NaN")
aaa<-unique(aaa)
# Create a simple EDF
edges <- create_edge_df(from=match(aaa$from,unique(nodeTemp)),
to = match(aaa$to,unique(nodeTemp)),
rel = "related")
edges$label <-paste(time.unit.global,aaa$trans,sep=":")
edges$label <- ifelse(edges$label ==paste(time.unit.global,"-1",sep=":"),"",edges$label)
graph_tmp <- create_graph( nodes_df = nodes,
edges_df = edges)
graph_tmp$global_attrs[1,"theme"]<- "neato"
graph_tmp$global_attrs[1,"value"] <- "dot"
#browser()
#render_graph(graph_tmp)
graphics[[hist]] <- graph_tmp
}
}
else if(kindOfOutput=="clinical"){
for(hist in 1:length(res)){
HistMined<-res[[hist]]$history
IDcountsLIST<-res[[hist]]$IDcounts
EVENT.durations<-res[[hist]]$EVENTduration
ARCHES.enrichment <- allHistEnrich[[hist]]$arcEnrich
ARCHES.enrichment.shadow <- allHistEnrich[[hist]]$arcEnrich.shadow
ARCHES.enrichment.npat <- allHistEnrich[[hist]]$arcEnrich.npat
ARCHES.enrichment.nonavalues <- allHistEnrich[[hist]]$arcEnrich.nonavalues
ARCHES.enrichment.navalues <- allHistEnrich[[hist]]$arcEnrich.navalues
IDsTOTTime <- bb$IDsTOTTime[[hist]]
nodeTemp<-c()
enrich <- c()
enrich.shadow <- c()
enrich.npat <- c()
enrich.nonavalues <- c()
enrich.navalues <- c()
ARCHES.enrichment[,1] <- NaN
single.ARCHES.enrichment <- as.data.frame((matrix(data = NA,nrow = nrow(ARCHES.enrichment),
ncol =ncol(ARCHES.enrichment) )))
single.ARCHES.enrichment.npat <- as.data.frame((matrix(data = NA,nrow = nrow(ARCHES.enrichment.npat),
ncol =ncol(ARCHES.enrichment.npat) )))
single.ARCHES.enrichment.nonavalues <- as.data.frame((matrix(data = NA,nrow = nrow(ARCHES.enrichment.nonavalues),
ncol =ncol(ARCHES.enrichment.nonavalues) )))
single.ARCHES.enrichment.navalues <- as.data.frame((matrix(data = NA,nrow = nrow(ARCHES.enrichment.navalues),
ncol =ncol(ARCHES.enrichment.navalues) )))
# Average on different enrichment groups on single arch (those patients who make the transition and might split on the next arch)
for (ii in 2:ncol(HistMined)){
if(length(which(duplicated(HistMined[,1:ii]) | duplicated(HistMined[,1:ii], fromLast = TRUE)))!=0){
unici <- unique(HistMined[,1:ii][which(duplicated(HistMined[,1:ii]) | duplicated(HistMined[,1:ii], fromLast = TRUE)),])
for(zz in 1:nrow(unici)){
arches.toaverage <- numeric()
index.toget <- numeric()
pat.tosum <- numeric()
nonavalues.tosum <- numeric()
navalues.tosum <- numeric()
for (kk in 1:nrow(HistMined)){
if(all(HistMined[kk,1:ii]==unici[zz,])){
arches.toaverage[kk] <- ARCHES.enrichment[kk,ii]
pat.tosum[kk] <- ARCHES.enrichment.npat[kk,ii]
nonavalues.tosum[kk] <- ARCHES.enrichment.nonavalues[kk,ii]
navalues.tosum[kk] <- ARCHES.enrichment.navalues[kk,ii]
index.toget[kk] <- kk
}
else{
next
}
}
index.toget <- index.toget[which(!is.na(index.toget))]
single.ARCHES.enrichment[index.toget,ii] <- mean(arches.toaverage,na.rm = T)
single.ARCHES.enrichment.npat[index.toget,ii] <- sum(pat.tosum,na.rm = T)
single.ARCHES.enrichment.nonavalues[index.toget,ii] <- sum(nonavalues.tosum,na.rm = T)
single.ARCHES.enrichment.navalues[index.toget,ii] <- sum(navalues.tosum,na.rm = T)
}
}
else {
single.ARCHES.enrichment[,ii] <- ARCHES.enrichment[,ii]
single.ARCHES.enrichment.npat[,ii] <- ARCHES.enrichment.npat[,ii]
single.ARCHES.enrichment.nonavalues[,ii] <- ARCHES.enrichment.nonavalues[,ii]
single.ARCHES.enrichment.navalues[,ii] <- ARCHES.enrichment.navalues[,ii]
}
}
#update arches enrichment shadow based on previous step (no values single arch is ok when parallel to a valued one)
for (n in 1:ncol(ARCHES.enrichment.shadow)){
noval.index <- which(ARCHES.enrichment.shadow[,n] == "no values" & !is.na(single.ARCHES.enrichment[,n]))
ARCHES.enrichment.shadow[noval.index,n]<- "ok"
}
#Build nodes and edges
for (k in 1:nrow(HistMined)){
# Create a simple NODES
for (j in 1:ncol(HistMined)){
if (HistMined[k,j] != "OUT"){
nodeTemp<-c(nodeTemp, paste(j, HistMined[k,j],
"\n","pts.",IDcountsLIST[k,j],
"\n","event duration",trunc(EVENT.durations[k,j]) ))
enrich <- c(enrich, trunc(single.ARCHES.enrichment[k,j]))
enrich.shadow <- c(enrich.shadow, ARCHES.enrichment.shadow[k,j])
enrich.npat <- c(enrich.npat, single.ARCHES.enrichment.npat[k,j])
enrich.nonavalues <- c(enrich.nonavalues, single.ARCHES.enrichment.nonavalues[k,j])
enrich.navalues <- c(enrich.navalues, single.ARCHES.enrichment.navalues[k,j])
}
else {
if (HistMined[k,j-1] != "OUT" ){
nodeTemp<-c(nodeTemp, paste(k, HistMined[k,j],
"\n","pts.",
IDsTOTTime[k,]$TotPts,
"\n","Total history duration",
trunc(IDsTOTTime[k,]$MeanMaxtimesDays)
))
enrich <- c(enrich, trunc(single.ARCHES.enrichment[k,j]))
enrich.shadow <- c(enrich.shadow, ARCHES.enrichment.shadow[k,j])
enrich.npat <- c(enrich.npat, single.ARCHES.enrichment.npat[k,j])
enrich.nonavalues <- c(enrich.nonavalues, single.ARCHES.enrichment.nonavalues[k,j])
enrich.navalues <- c(enrich.navalues, single.ARCHES.enrichment.navalues[k,j])
}}}}
# Create a NDF
nodes <-create_node_df(n= length(unique(nodeTemp)), type = "chr", label = unique(nodeTemp))
#add al the things we need for output
enrich <-create_node_df(n= length(enrich), type = "chr", label = enrich)
enrich.shadow <-create_node_df(n= length(enrich.shadow), type = "chr", label = enrich.shadow)
enrich.npat <-create_node_df(n= length(enrich.npat), type = "chr", label = enrich.npat)
enrich.nonavalues <-create_node_df(n= length(enrich.nonavalues), type = "chr", label = enrich.nonavalues)
enrich.navalues <-create_node_df(n= length(enrich.navalues), type = "chr", label = enrich.navalues)
enrich<-enrich[-1,]
enrich.shadow<-enrich.shadow[-1,]
enrich.npat<-enrich.npat[-1,]
enrich.nonavalues<-enrich.nonavalues[-1,]
enrich.navalues<-enrich.navalues[-1,]
aaa<-data.frame( from = nodeTemp[1:length(nodeTemp)-1],to = nodeTemp[2:length(nodeTemp)])
aaa$trans<-enrich$label
aaa$trans.shadow <-enrich.shadow$label
aaa$trans.npat <-enrich.npat$label
aaa$trans.nonavalues <-enrich.nonavalues$label
aaa$trans.navalues <-enrich.navalues$label
aaa<-aaa[which(!is.na(aaa$trans.shadow)),]
#aaa <- subset(aaa, !duplicated(aaa[,1:2]))
aaa <- unique(aaa)
# Create a simple EDF
edges <- create_edge_df(from=match(aaa$from,unique(nodeTemp)),
to = match(aaa$to,unique(nodeTemp)),
rel = "related")
edges$label <-paste("mean val.",aaa$trans,"\n",
"pts.",aaa$trans.npat,"\n",
"obs.",aaa$trans.nonavalues,"\n",
"NAs",aaa$trans.navalues)
edges$label <- ifelse(aaa$trans.shadow =="OUT","",edges$label)
edges$label <- ifelse(aaa$trans.shadow =="no values","no values",edges$label)
graph_tmp <- create_graph( nodes_df = nodes,
edges_df = edges)
graph_tmp$global_attrs[1,"value"] <- "dot"
#browser()
#render_graph(graph_tmp)
graphics[[hist]] <- graph_tmp
}
}
return(graphics)
}
#===========================================================
#===================BEGIN PRIVATE FUNCTIONS=================
#===========================================================
selection <- function(mat,support.th){
#check how many time an event is the first event of histories
tab <- as.data.frame(table(mat[,2]))
#Compute the support (supp) for each first event
tab$supp=tab$Freq/nrow(mat)
#select the first events where the threshold is verified
selected<-tab[which(tab$supp >= support.th ),1]
return(selected)
}
createhistFIRSTTIME <- function(selected){
historyFE<- data.frame()
#create histories
for (k in 1:length(selected)){
historyFE[k,1]<-paste(selected[k])
}
names(historyFE)[1]<-paste("EVENT_1")
return(historyFE)
}
computeNewMat<- function(event,mat){
#select only histories starting with first frequent event
mat_new<-mat[which(mat[,2] == event),]
#delete first event column
mat_new<-mat_new[,-2]
out<-subset(mat_new, is.na(mat_new[,2])==T)
mat_new<-subset(mat_new, is.na(mat_new[,2])==F)
return(mat_new)
}
computeNewMatDates<- function(mat,IDs){
#update date matrix
mat_NEW<-mat[which(mat$ID %in% IDs),]
mat_NEW<-mat_NEW[,-2]
return(mat_NEW)
}
createhist <- function(event, selected, history){
totake<-history[,ncol(history)-1]==event
toadd<-c(history[totake,1:ncol(history)-1])
#create histories
for (k in 1:length(selected)){
addtohist<-c(toadd,paste(selected[k]))
addtohist <- data.frame(lapply(addtohist, as.character), stringsAsFactors=FALSE)
names(addtohist)<-names(history)
history<-rbind(history,addtohist)
}
return(history)
}
computeIDcount <- function(event, selected, history,IDcount,IDs,matNEW){
totakeID<-history[,ncol(history)-1]==event
toaddID<-c(IDcount[totakeID,1:ncol(IDcount)-1])
IdsToTake <- c()
for (k in 1:length(selected)) {
IDcounttoADD<- length(matNEW[which(matNEW[,2] == selected[k]),1])
addtoId<-c(toaddID,IDcounttoADD)
addtoId <- data.frame(lapply(addtoId, as.numeric), stringsAsFactors=FALSE)
names(addtoId)<-names(IDcount)
IDcount<-rbind(IDcount,addtoId)
#ID of patients
IDs[[length(IDs)+1]] <- matNEW[which(matNEW[,2] == selected[k]),1]
#Update Pts to take
IdsToTake <- c(IdsToTake,matNEW[which(matNEW[,2] == selected[k]),1])
}
my_list <- list(IDcount, IDs,IdsToTake)
return(my_list)
}
computeEventDuration <- function(event,selected,history,EVENTduration,matNEW,matNEW_dateStart,matNEW_dateEnd){
totakeE<-history[,ncol(history)-1]==event
toaddE<-c(EVENTduration[totakeE,1:ncol(EVENTduration)-1])
for (k in 1:length(selected)){
EVNTini<-subset(matNEW_dateStart, matNEW[,2]==selected[k])
EVNTend<-subset(matNEW_dateEnd, matNEW[,2]==selected[k])
# EVENTdurationSELECTED<-mean((EVNTend[,2] - EVNTini[,2]), na.rm = TRUE)
EVENTdurationSELECTED<-mean( difftime(EVNTend[,2] ,EVNTini[,2], units = time.unit.global ) , na.rm = TRUE)
toaddevnt<-c(toaddE,EVENTdurationSELECTED)
toaddevnt <- data.frame(lapply(toaddevnt, as.numeric), stringsAsFactors=FALSE)
names(toaddevnt)<-names(EVENTduration)
EVENTduration<-rbind(EVENTduration,toaddevnt)
}
return(EVENTduration)
}
computeArchesDuration <- function(event,selected,history,ARCHESduration,matHIST,matHIST_dateStart,matHIST_dateEnd){
totakeA<-history[,ncol(history)-1]==event
toaddA<-c(ARCHESduration[totakeA,1:ncol(ARCHESduration)-1])
for (k in 1:length(selected)){
Archesini<-subset(matHIST_dateStart, matHIST[,2]==event & matHIST[,3]==selected[k])
Archesend<-subset(matHIST_dateEnd, matHIST[,2]==event & matHIST[,3]==selected[k])
# ArchesSELECTED<-mean((Archesini[,3] - Archesend[,2]),na.rm = TRUE)
ArchesSELECTED<-mean( as.integer(difftime(Archesini[,3] , Archesend[,2], units = time.unit.global) ) ,na.rm = TRUE)
toaddevnt<-c(toaddA,ArchesSELECTED)
toaddevnt <- data.frame(lapply(toaddevnt, as.numeric), stringsAsFactors=FALSE)
names(toaddevnt)<-names(ARCHESduration)
ARCHESduration<-rbind(ARCHESduration,toaddevnt)
}
return(ARCHESduration)
}
getHistoryData <- function() {
HistMined <- list()
IDsTOTTime <- list()
actualIds <- list()
for (hist in 1:length(res)){
HistMined[[hist]]<-res[[hist]]$history
IDs<-res[[hist]]$IDss
actualIds[[hist]] <- list()
#Number of Events in the History
LengthHist<-c()
for (j in 1:nrow(HistMined[[hist]])){
LengthHistTMP<-length(HistMined[[hist]][j,HistMined[[hist]][j,]!="OUT"])
LengthHist<-c(LengthHist,LengthHistTMP)
}
HistMined[[hist]]$LengthHistEvnt<-LengthHist
alltheID<-unique(unlist(IDs))
HistMined[[hist]]$TotPts<-NA
HistMined[[hist]]$MeanMaxtimes<-NA
for (j in 1:nrow(HistMined[[hist]])){
#Pts in the history but no in the those mined at the following iteration (hisotry lenght plus 1)
IntheHist <-unique(unlist(IDs[[j]]))
HistToTakeLength<- as.integer(HistMined[[hist]]$LengthHistEvnt[j]+1)
InOtherHist<- unique(unlist( IDs[HistMined[[hist]]$LengthHistEvnt == HistToTakeLength]))
IDDDSSS<-alltheID[alltheID %in% IntheHist]
IDDDSSS<-IDDDSSS[!IDDDSSS %in% InOtherHist]
HistMined[[hist]]$TotPts[j]<-length(IDDDSSS)
if(length(IDDDSSS)!=0){
LogID<-evLog[IDDDSSS]
HistMined[[hist]]$MeanMaxtimes[j] <-mean(sapply(1:length(LogID), function(i) max(as.numeric(LogID[[i]]$pMineR.deltaDate))))
actualIds[[hist]][[j]] <- IDDDSSS
}
else{
HistMined[[hist]]$TotPts[j]<- 0
HistMined[[hist]]$MeanMaxtimes[j] <- 0
actualIds[[hist]][[j]] <- NA
}
}
#trnasform in Days
HistMined[[hist]]$MeanMaxtimesDays<- (HistMined[[hist]]$MeanMaxtimes/60)/24
IDsTOTTime[[hist]]<-HistMined[[hist]][,(ncol(HistMined[[hist]])-3):ncol(HistMined[[hist]])]
}
output <- list("IDsTOTTime"=IDsTOTTime,"actualIds"=actualIds)
return(output)
}
#===========================================================
#===================END PRIVATE FUNCTIONS===================
#===========================================================
#===========================================================
# costructor
# E' il costruttore della classe
#===========================================================
costructor<-function( parametersFromInput = list() ) {
parameters <<-parametersFromInput
evLog <<- ''
res <<- ''
allHistEnrich <<- ''
time.unit.global <<- ''
}
#===========================================================
costructor( parametersFromInput = parameters.list);
#===========================================================
return( list(
"loadDataset"=loadDataset,
"trainModel"=trainModel,
"getModel"=getModel,
# "play"=play,
"enrichModel"=enrichModel,
"plot"=plot
) )
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.