NIL_STATE <- -1
#' Title
#'
#' @param astate_list
#' @param asigma
#' @param amode
#'
#' @return
#' @export
#'
#' @examples
# state_fn2 <- function(astate_list, asigma, amode = "sequence")
# {
# # pesquisa lista de estados
# lstate_list <- astate_list
# # print(asigma)
# # print(lstate_list)
# Result <- match(asigma, lstate_list)
# if (is.na(Result))
# {
# Result = length(lstate_list) + 1
# # print(Result)
# lstate_list[Result] <- asigma
# eval.parent(substitute(astate_list<-lstate_list))
# }
# return(Result)
# }
#
# Alexandre: funcao removida, trocada pelo match, com parametro de retorno quando NO MATCH
#
# find_state_fn2 <- function(astate_list, asigma, amode = "sequence")
# {
# # pesquisa lista de estados
# Result <- match(asigma, astate_list)
# if (is.na(Result))
# {
# Result = NIL_STATE
# }
# return(Result)
# }
#' funcao de construcao do MTA, recebe o trace, lista de instancias
#' Código fonte original
#'
#' @param events : list of events
#' @param horiz : the horizon
#' @param sel_attributes : list of selected attributes
#'
#' @return
#' @export
#'
#' @examples
build_ats <- function(aevents, horiz, sel_attributes)
{
# mudanças nos nomes das variáveis
# nome no código original novo nome
# asel_traces_lcl events
# sel_traces_lcl events (copy)
# process_instances traces
# trace traceEvents
events <- aevents # necessário para atualizar a lista original no final da execução
generate_log(paste("Starting build_ats for horizon",horiz,"and attributes [",paste( unlist(sel_attributes), collapse=', '),"]"),2)
traces <- distinct_(events, TRACE_ID_COLUMN_NAME)
traces_states <-list(
number=traces,
set_states=list(),
mset_states=list(),
seq_states=list()
)
# listas com os estados de cada modelo
seq_state_list <- list()
set_state_list <- list()
mset_state_list <- list()
generate_log(" inicio dos loops",2)
for(i in 1:nrow(traces))
{
#print(paste("inicio loop principal: ",i))
# busca eventos do caso / search events of the given trace
traceEvents <- events[events$number == traces$number[i],]
#generate_log(paste(" trace",traces$number[i],"with",nrow(traceEvents),"events"),2)
# it must happen due to the backward update to the original events in the end of this function
traceEvents$seq_state_id <- 0
traceEvents$set_state_id <- 0
traceEvents$mset_state_id <- 0
traceEvents$sojourn_set_stc <- 0
traceEvents$sojourn_mset_stc <- 0
traceEvents$sojourn_seq_stc <- 0
activity_fields <- do.call(paste, as.data.frame(traceEvents[,sel_attributes]))
# vector with the timestamps for when the event has started
timestamp_field <- as.vector(traceEvents[, EVENT_TIMESTAMP_COLUMN_NAME])
# Step 1: populate the lists of states for each abstraction
seq_list <- list()
set_list <- list()
mset_list <- list()
for (j in 1:nrow(traceEvents))
{
#print(paste("inicio loop 1: ",j))
# calculate based on the horizon
horiz_index <- max(j - horiz + 1, 1)
# generates state for SEQUENCE abstration
inc_seq <- as.character(activity_fields[horiz_index:j])
seq_list[[j]] <- toString(inc_seq)
# generates state for SET
inc_set <- as.set(inc_seq)
set_list[[j]] <- toString(inc_set)
#stateId <- state_fn2(set_state_list, set_list[[j]])
stateId <- match(set_list[[j]],set_state_list,NIL_STATE)
if ( stateId == NIL_STATE ) {
set_state_list <- append(set_state_list, set_list[[j]])
stateId <- length(set_state_list)
}
# generates state for multi-set
inc_gset <- as.gset(inc_seq)
inc_gset_str <- toString(rbind(gset_support(inc_gset), gset_memberships(inc_gset)))
mset_list[[j]] <- inc_gset_str
if (horiz==1) # horizon == 1, all will be the same
{
traceEvents[j,c("set_state_id","mset_state_id","seq_state_id")] <- stateId
}
else
{
traceEvents[j,"set_state_id"] <- stateId
# now for the multi-set
#traceEvents[j,"mset_state_id"] <- state_fn2(mset_state_list, inc_gset_str)
stateId <- match(mset_list[[j]],mset_state_list,NIL_STATE)
if ( stateId == NIL_STATE ) {
mset_state_list <- append(mset_state_list, mset_list[[j]])
stateId <- length(mset_state_list)
}
traceEvents[j,"mset_state_id"] <- stateId
# and for the sequence
#traceEvents[j,"seq_state_id"] <- state_fn2(seq_state_list, seq_list[[j]])
stateId <- match(seq_list[[j]],seq_state_list,NIL_STATE)
if ( stateId == NIL_STATE ) {
seq_state_list <- append(seq_state_list, seq_list[[j]])
stateId <- length(seq_state_list)
}
traceEvents[j,"seq_state_id"] <- stateId
}
}
# Step 2, calculate the sojourn
calculate_sojourn_v2(traceEvents)
# armazena resultado das transicoes de estado para instancia atual
# modelo de abstracao sequencia
traces_states$seq_states[i] <- list(traceEvents[,"seq_state_id"])
# modelo de abstracao set
traces_states$set_states[i] <- list(traceEvents[,"set_state_id"])
# modelo de abstracao mset
traces_states$mset_states[i] <- list(traceEvents[,"mset_state_id"])
# guardo a estado no evento
events[events$number == traces$number[i],]$seq_state_id <- traceEvents$seq_state_id
events[events$number == traces$number[i],]$set_state_id <- traceEvents$set_state_id
events[events$number == traces$number[i],]$mset_state_id <- traceEvents$mset_state_id
events[events$number == traces$number[i],]$sojourn_set_stc <- traceEvents$sojourn_set_stc
events[events$number == traces$number[i],]$sojourn_mset_stc <- traceEvents$sojourn_mset_stc
events[events$number == traces$number[i],]$sojourn_seq_stc <- traceEvents$sojourn_seq_stc
} # fim i
generate_log(" fim dos loops",2)
# retorna resultado - precisa atualizar a lista de eventos original, para os cálculos
eval.parent(substitute(aevents<-events))
mta_model <- list(traces_states=traces_states,
seq_state_list=seq_state_list,
set_state_list=set_state_list,
mset_state_list=mset_state_list,
horiz=horiz,
sel_attributes=sel_attributes
)
#generate_log("Finalizando build_ats",2)
return(mta_model)
}
#' Cálculo de Sojourn usado na primeira execução completa
#' Não é o código original - foi modificado para seguir mais na linha do que foi
#' proposto pelo van der Aalst no artigo de Time Prediction
#'
#' @param traceEvents
#'
#' @return
#' @export
#'
#' @examples
calculate_sojourn_v1 <- function(aTraceEvents) {
traceEvents <- aTraceEvents
curr_sojourn_set_state <- traceEvents[1,"set_state_id"]
curr_sojourn_mset_state <- traceEvents[1,"mset_state_id"]
curr_sojourn_seq_state <- traceEvents[1,"seq_state_id"]
curr_sojourn_set_stc <- 0
curr_sojourn_mset_stc <- 0
curr_sojourn_seq_stc <- 0
# first event of each case, the sojourn will be always zero
for (j in 2:nrow(traceEvents))
{
elapsed <- traceEvents[j,"elapsed_stc"]
# advances until it reaches a distinct state - for SET
if ( traceEvents[j,"set_state_id"] != curr_sojourn_set_state ) {
curr_sojourn_set_state <- traceEvents[j,"set_state_id"]
traceEvents[j,"sojourn_set_stc"] <- elapsed - curr_sojourn_set_stc
curr_sojourn_set_stc <- elapsed
}
# now for MULTI-SET
if ( traceEvents[j,"mset_state_id"] != curr_sojourn_mset_state ) {
curr_sojourn_mset_state <- traceEvents[j,"mset_state_id"]
traceEvents[j,"sojourn_mset_stc"] <- elapsed - curr_sojourn_mset_stc
curr_sojourn_mset_stc <- elapsed
}
# and now for SEQUENCE
if ( traceEvents[j,"seq_state_id"] != curr_sojourn_seq_state ) {
curr_sojourn_seq_state <- traceEvents[j,"seq_state_id"]
traceEvents[j,"sojourn_seq_stc"] <- elapsed - curr_sojourn_seq_stc
curr_sojourn_seq_stc <- elapsed
}
} # fim j
eval.parent(substitute(aTraceEvents<-traceEvents))
}
#' Cálculo de Sojourn versão 2
#' (na verdade, versão 3 se considerar a 1a como sendo a do código original)
#' Neste caso a alteração é que o resultado do sojourn calculado deverá atualizar a
#' célula j-1 e não a j.
#'
#' @param traceEvents
#'
#' @return
#' @export
#'
#' @examples
calculate_sojourn_v2 <- function(aTraceEvents) {
traceEvents <- aTraceEvents
# gets the first state for each abstraction, as the current
curr_sojourn_set_state <- traceEvents[1,"set_state_id"]
curr_sojourn_mset_state <- traceEvents[1,"mset_state_id"]
curr_sojourn_seq_state <- traceEvents[1,"seq_state_id"]
curr_sojourn_set_stc <- 0
curr_sojourn_mset_stc <- 0
curr_sojourn_seq_stc <- 0
# jump to second event as the update will be on j-1
for (j in 2:nrow(traceEvents))
{
elapsed <- traceEvents[j,"elapsed_stc"]
# advances until it reaches a distinct state - for SET
if ( traceEvents[j,"set_state_id"] != curr_sojourn_set_state ) {
curr_sojourn_set_state <- traceEvents[j,"set_state_id"]
traceEvents[j-1,"sojourn_set_stc"] <- elapsed - curr_sojourn_set_stc
curr_sojourn_set_stc <- elapsed
}
# now for MULTI-SET
if ( traceEvents[j,"mset_state_id"] != curr_sojourn_mset_state ) {
curr_sojourn_mset_state <- traceEvents[j,"mset_state_id"]
traceEvents[j-1,"sojourn_mset_stc"] <- elapsed - curr_sojourn_mset_stc
curr_sojourn_mset_stc <- elapsed
}
# and now for SEQUENCE
if ( traceEvents[j,"seq_state_id"] != curr_sojourn_seq_state ) {
curr_sojourn_seq_state <- traceEvents[j,"seq_state_id"]
traceEvents[j-1,"sojourn_seq_stc"] <- elapsed - curr_sojourn_seq_stc
curr_sojourn_seq_stc <- elapsed
}
} # fim j
eval.parent(substitute(aTraceEvents<-traceEvents))
}
#' funcao de construcao dos MTA recebe o trace, lista de instancias
#'
#' @param asel_traces_lcl
#' @param process_instances
#' @param ats
#'
#' @return
#' @export
#'
#' @examples
#build_prediction <- function(asel_traces_lcl, process_instances, mta_in)
build_prediction <- function(aevents, ats)
{
generate_log("Starting build_prediction",2)
events <- aevents # necessário para atualizar a lista original no final da execução
traces <- distinct_(events, TRACE_ID_COLUMN_NAME)
# process_instances_states <-list(
traces_states <-list(
number=traces,
set_states=list(),
mset_states=list(),
seq_states=list()
)
# listas com os estados de cada modelo
seq_state_list <- ats$seq_state_list
set_state_list <- ats$set_state_list
mset_state_list <- ats$mset_state_list
for(i in 1:nrow(traces))
{
# busca eventos do caso / search events of the given trace
traceEvents <- events[events$number == traces$number[i],]
traceEvents$seq_state_id <- 0
traceEvents$set_state_id <- 0
traceEvents$mset_state_id <- 0
traceEvents$sojourn_set_stc <- 0
traceEvents$sojourn_mset_stc <- 0
traceEvents$sojourn_seq_stc <- 0
# label utilizado na função de eventos (campos do modelo)
activity_fields <- do.call(paste, as.data.frame(traceEvents[,ats$sel_attributes]))
# tempo em que ocorreu o evento
timestamp_field <- as.vector(traceEvents[, EVENT_TIMESTAMP_COLUMN_NAME])
# listas com os modelos de abstração usados para representação dos eventos
seq_list <- list()
set_list <- list()
mset_list <- list()
for (j in 1:nrow(traceEvents))
{
# calculate the horizon
horiz_index <- max(j - ats$horiz + 1, 1)
# generates state for SEQUENCE abstration
inc_seq <- as.character(activity_fields[horiz_index:j])
seq_list[[j]] <- toString(inc_seq)
# generates state for SET
inc_set <- as.set(inc_seq)
set_list[[j]] <- toString(inc_set)
# multi-set (requires package sets)
inc_gset <- as.gset(inc_seq)
inc_gset_str <- toString(rbind(gset_support(inc_gset), gset_memberships(inc_gset)))
mset_list[[j]] <- inc_gset_str
#traceEvents[j,"set_state_id"] <- find_state_fn2(set_state_list, set_list[[j]], "set")
stateId <- match(set_list[[j]], set_state_list, NIL_STATE)
if (ats$horiz==1) # horizonte 1, todos iguais
{
traceEvents[j,c("set_state_id","mset_state_id","seq_state_id")] <- stateId
}
else
{
traceEvents[j,"set_state_id"] <- stateId
# idem com modelo multiset
#traceEvents[j,"mset_state_id"] <- find_state_fn2(mset_state_list, mset_list[[j]] , "mset")
traceEvents[j,"mset_state_id"] <- match(mset_list[[j]], mset_state_list, NIL_STATE)
# modelo de abstracao sequencia
#traceEvents[j,"seq_state_id"] <- find_state_fn2(seq_state_list, seq_list[[j]], "sequence")
traceEvents[j,"seq_state_id"] <- match(seq_list[[j]], seq_state_list, NIL_STATE)
}
}
# Step 2, calculate the sojourn
curr_sojourn_set_state <- traceEvents[1,"set_state_id"]
curr_sojourn_mset_state <- traceEvents[1,"mset_state_id"]
curr_sojourn_seq_state <- traceEvents[1,"seq_state_id"]
curr_sojourn_set_stc <- 0
curr_sojourn_mset_stc <- 0
curr_sojourn_seq_stc <- 0
# first event of each case, the sojourn will be always zero
for (j in 2:nrow(traceEvents))
{
#print(paste("inicio loop 2: ",j))
elapsed <- traceEvents[j,"elapsed_stc"]
# advances until it reaches a distinct state - for SET
if ( traceEvents[j,"set_state_id"] != curr_sojourn_set_state ) {
curr_sojourn_set_state <- traceEvents[j,"set_state_id"]
traceEvents[j,"sojourn_set_stc"] <- elapsed - curr_sojourn_set_stc
curr_sojourn_set_stc <- elapsed
}
# now for MULTI-SET
if ( traceEvents[j,"mset_state_id"] != curr_sojourn_mset_state ) {
curr_sojourn_mset_state <- traceEvents[j,"mset_state_id"]
traceEvents[j,"sojourn_mset_stc"] <- elapsed - curr_sojourn_mset_stc
curr_sojourn_mset_stc <- elapsed
}
# and now for SEQUENCE
if ( traceEvents[j,"seq_state_id"] != curr_sojourn_seq_state ) {
curr_sojourn_seq_state <- traceEvents[j,"seq_state_id"]
traceEvents[j,"sojourn_seq_stc"] <- elapsed - curr_sojourn_seq_stc
curr_sojourn_seq_stc <- elapsed
}
} # fim j
# armazena resultado das transições de estado para instancia atual
# modelo de abstração sequencia
traces_states$seq_states[i] <- list(traceEvents[,"seq_state_id"])
# modelo de abstração set
traces_states$set_states[i] <- list(traceEvents[,"set_state_id"])
# modelo de abstração mset
traces_states$mset_states[i] <- list(traceEvents[,"mset_state_id"])
# guardo a estado no evento
events[events$number == traces$number[i],]$seq_state_id <- traceEvents$seq_state_id
events[events$number == traces$number[i],]$set_state_id <- traceEvents$set_state_id
events[events$number == traces$number[i],]$mset_state_id <- traceEvents$mset_state_id
events[events$number == traces$number[i],]$sojourn_set_stc <- traceEvents$sojourn_set_stc
events[events$number == traces$number[i],]$sojourn_mset_stc <- traceEvents$sojourn_mset_stc
events[events$number == traces$number[i],]$sojourn_seq_stc <- traceEvents$sojourn_seq_stc
} # fim i
# retorna resultado - precisa atualizar a lista de eventos original, para os cálculos
eval.parent(substitute(aevents<-events))
mta_model <- list(traces_states=traces_states,
seq_state_list=ats$seq_state_list,
set_state_list=ats$set_state_list,
mset_state_list=ats$mset_state_list,
horiz=ats$horiz,
sel_attributes=ats$sel_attributes
)
#generate_log("Finalizando build_prediction",2)
return(mta_model)
}
#' DEPRECATED
#'
#' @param lsel_traces_list
#'
#' @return
#' @export
#'
#' @examples
#eval_model_gen_fn <- function(lsel_traces_list)
eval_model_gen_fn <- function(events)
{
summary_pred_stats <- NULL
result <- NULL
# for (sel_trace_ in lsel_traces_list)
for (fold_events in events)
{
#incidentevtlog_anot<- as.data.frame(
events_anot <- as.data.frame(
fold_events[, c("number", "updated_at", "incident_state", "seq_state_id","set_state_id", "mset_state_id",
"sojourn_set_stc","sojourn_mset_stc","sojourn_seq_stc","elapsed_stc", "remaining_stc")]
)
# teste estatistica convertida # Alexandre: o que faz esse código aqui?
events_anot$remaining_stc <- events_anot$remaining_stc
# gerar as contagens e medias por estado
# num_secs <- 1 * 60 * 60 # em horas
# num_secs <- 1 # em segundos
# inc.outlier <- T
# Gera informações de predição por estado
# TODO: Avaliar o calculo retirando os valores de outlier 1.5 * IQR
# predição no primeiro conjunto treinamento - demais validação
if (is.null(summary_pred_stats))
{
# filtrar os valores que sao estados finais pois distorcem a media
incidentevtlog_anot_st <- events_anot[events_anot$remaining_stc > 0,]
summary_set <- gen_summary_pred_fn(incidentevtlog_anot_st, 'set_state_id','remaining_stc')
summary_mset <- gen_summary_pred_fn(incidentevtlog_anot_st, 'mset_state_id','remaining_stc')
summary_seq <- gen_summary_pred_fn(incidentevtlog_anot_st, 'seq_state_id','remaining_stc')
summary_sj_set <- gen_summary_pred_fn(incidentevtlog_anot_st, 'set_state_id','sojourn_set_stc')
summary_sj_mset <- gen_summary_pred_fn(incidentevtlog_anot_st, 'mset_state_id','sojourn_mset_stc')
summary_sj_seq <- gen_summary_pred_fn(incidentevtlog_anot_st, 'seq_state_id','sojourn_seq_stc')
#armazena totais
summary_pred_stats <- list(summary_set, summary_mset, summary_seq,
summary_sj_set, summary_sj_mset, summary_sj_seq)
}
# atualiza predited values media, mediana e desvio padrão
# set
events_anot$remaining_stc_pset_mean <-
summary_set$mean[match(events_anot$set_state_id, summary_set$set_state_id)] +
summary_sj_set$mean[match(events_anot$set_state_id, summary_sj_set$set_state_id)] -
events_anot$sojourn_set_stc
events_anot$remaining_stc_pset_median <-
summary_set$median[match(events_anot$set_state_id, summary_set$set_state_id)] +
summary_sj_set$median[match(events_anot$set_state_id, summary_sj_set$set_state_id)] -
events_anot$sojourn_set_stc
events_anot$remaining_stc_pset_sd <-
summary_set$sd[match(events_anot$set_state_id, summary_set$set_state_id)] +
summary_sj_set$sd[match(events_anot$set_state_id, summary_sj_set$set_state_id)] -
events_anot$sojourn_set_stc
# multi set
events_anot$remaining_stc_pmset_mean <-
summary_mset$mean[match(events_anot$mset_state_id, summary_mset$mset_state_id)] +
summary_sj_mset$mean[match(events_anot$mset_state_id, summary_sj_mset$mset_state_id)] -
events_anot$sojourn_mset_stc
events_anot$remaining_stc_pmset_median <-
summary_mset$median[match(events_anot$mset_state_id, summary_mset$mset_state_id)] +
summary_sj_mset$median[match(events_anot$mset_state_id, summary_sj_mset$mset_state_id)] -
events_anot$sojourn_mset_stc
events_anot$remaining_stc_pmset_sd <-
summary_mset$sd[match(events_anot$mset_state_id, summary_mset$mset_state_id)] +
summary_sj_mset$sd[match(events_anot$mset_state_id, summary_sj_mset$mset_state_id)] -
events_anot$sojourn_mset_stc
# sequence
events_anot$remaining_stc_pseq_mean <-
summary_seq$mean[match(events_anot$seq_state_id, summary_seq$seq_state_id)] +
summary_sj_seq$mean[match(events_anot$seq_state_id, summary_sj_seq$seq_state_id)] -
events_anot$sojourn_seq_stc
events_anot$remaining_stc_pseq_median <-
summary_seq$median[match(events_anot$seq_state_id, summary_seq$seq_state_id)] +
summary_sj_seq$median[match(events_anot$seq_state_id, summary_sj_seq$seq_state_id)] -
events_anot$sojourn_seq_stc
events_anot$remaining_stc_pseq_sd <-
summary_seq$sd[match(events_anot$seq_state_id, summary_seq$seq_state_id)] +
summary_sj_seq$sd[match(events_anot$seq_state_id, summary_sj_seq$seq_state_id)] -
events_anot$sojourn_seq_stc
# remove valorers sem match para calculo erro
incidentevtlog_anot_err <- na.omit(events_anot)
# remove valores dos estados finais Target = 0 que distorcem a média
# valores do ultimo estado serão sempre precisos
incidentevtlog_anot_err <- incidentevtlog_anot_err[incidentevtlog_anot_err$remaining_stc > 0,]
# calculo erro MAPE e RMSPE todos os registros
#MAPE(y_pred, y_true)
mape_val <- c(
MAPE(incidentevtlog_anot_err$remaining_stc_pset_mean, incidentevtlog_anot_err$remaining_stc),
MAPE(incidentevtlog_anot_err$remaining_stc_pset_median, incidentevtlog_anot_err$remaining_stc),
MAPE(incidentevtlog_anot_err$remaining_stc_pset_sd, incidentevtlog_anot_err$remaining_stc),
MAPE(incidentevtlog_anot_err$remaining_stc_pmset_mean, incidentevtlog_anot_err$remaining_stc),
MAPE(incidentevtlog_anot_err$remaining_stc_pmset_median, incidentevtlog_anot_err$remaining_stc),
MAPE(incidentevtlog_anot_err$remaining_stc_pmset_sd, incidentevtlog_anot_err$remaining_stc),
MAPE(incidentevtlog_anot_err$remaining_stc_pseq_mean, incidentevtlog_anot_err$remaining_stc),
MAPE(incidentevtlog_anot_err$remaining_stc_pseq_median, incidentevtlog_anot_err$remaining_stc),
MAPE(incidentevtlog_anot_err$remaining_stc_pseq_sd, incidentevtlog_anot_err$remaining_stc)
)
names(mape_val) <- c(
"val_mape_pset_mean","val_mape_pset_median","val_mape_pset_sd",
"val_mape_pmset_mean","val_mape_pmset_median","val_mape_pmset_sd",
"val_mape_pseq_mean","val_mape_pseq_median","val_mape_pseq_sd"
)
mape_val
#RMSPE(y_pred, y_true)
rmspe_val <- c(
RMSPE(incidentevtlog_anot_err$remaining_stc_pset_mean, incidentevtlog_anot_err$remaining_stc),
RMSPE(incidentevtlog_anot_err$remaining_stc_pset_median, incidentevtlog_anot_err$remaining_stc),
RMSPE(incidentevtlog_anot_err$remaining_stc_pset_sd, incidentevtlog_anot_err$remaining_stc),
RMSPE(incidentevtlog_anot_err$remaining_stc_pmset_mean, incidentevtlog_anot_err$remaining_stc),
RMSPE(incidentevtlog_anot_err$remaining_stc_pmset_median, incidentevtlog_anot_err$remaining_stc),
RMSPE(incidentevtlog_anot_err$remaining_stc_pmset_sd, incidentevtlog_anot_err$remaining_stc),
RMSPE(incidentevtlog_anot_err$remaining_stc_pseq_mean, incidentevtlog_anot_err$remaining_stc),
RMSPE(incidentevtlog_anot_err$remaining_stc_pseq_median, incidentevtlog_anot_err$remaining_stc),
RMSPE(incidentevtlog_anot_err$remaining_stc_pseq_sd, incidentevtlog_anot_err$remaining_stc)
)
names(rmspe_val) <- c(
"val_rmspe_pset_mean","val_rmspe_pset_median","val_rmspe_pset_sd",
"val_rmspe_pmset_mean","val_rmspe_pmset_median","val_rmspe_pmset_sd",
"val_rmspe_pseq_mean","val_rmspe_pseq_median","val_rmspe_pseq_sd"
)
rmspe_val
#non fitting
non_fit_arr <- c(
nrow(fold_events),
nrow(events_anot),
nrow(events_anot[events_anot$set_state_id == NIL_STATE,]),
nrow(events_anot[events_anot$mset_state_id == NIL_STATE,]),
nrow(events_anot[events_anot$seq_state_id == NIL_STATE,]),
length(unique(events_anot$set_state_id)),
length(unique(events_anot$mset_state_id)),
length(unique(events_anot$seq_state_id))
)
names(non_fit_arr) <- c("num_evt_tot","num_evt_ok","num_evt_nf_set",
"num_evt_nf_mset","num_evt_nf_seq", "num_set_states",
"num_mset_states", "num_seq_states")
#print(non_fit_arr)
non_fit_per_arr <- c(
non_fit_arr[c("num_evt_nf_set")] / non_fit_arr[c("num_evt_ok")],
non_fit_arr[c("num_evt_nf_mset")] / non_fit_arr[c("num_evt_ok")],
non_fit_arr[c("num_evt_nf_seq")] / non_fit_arr[c("num_evt_ok")]
)
names(non_fit_per_arr) <- c("perr_nf_set","perr_nf_mset","perr_nf_seq")
non_fit_per_arr <- non_fit_per_arr * 100
# retorna o menor erro - media ou mediana
perr_tot_arr <- c(
min(mape_val[c("val_mape_pset_mean")], mape_val[c("val_mape_pset_median")]),
min(mape_val[c("val_mape_pmset_mean")], mape_val[c("val_mape_pmset_median")]),
min(mape_val[c("val_mape_pseq_mean")], mape_val[c("val_mape_pseq_median")])
)
names(perr_tot_arr) <- c(
"perr_tot_set","perr_tot_mset","perr_tot_seq"
)
perr_tot_arr
# filtro para eventos com fit
incidentevtlog_anot_err_set1 <- incidentevtlog_anot_err[incidentevtlog_anot_err$set_state_id != NIL_STATE,]
incidentevtlog_anot_err_mset1 <- incidentevtlog_anot_err[incidentevtlog_anot_err$mset_state_id != NIL_STATE,]
incidentevtlog_anot_err_seq1 <- incidentevtlog_anot_err[incidentevtlog_anot_err$seq_state_id != NIL_STATE,]
#MAPE(y_pred, y_true)
mape_val1 <- c(
MAPE(incidentevtlog_anot_err_set1$remaining_stc_pset_mean, incidentevtlog_anot_err_set1$remaining_stc),
MAPE(incidentevtlog_anot_err_set1$remaining_stc_pset_median, incidentevtlog_anot_err_set1$remaining_stc),
MAPE(incidentevtlog_anot_err_set1$remaining_stc_pset_sd, incidentevtlog_anot_err_set1$remaining_stc),
MAPE(incidentevtlog_anot_err_mset1$remaining_stc_pmset_mean, incidentevtlog_anot_err_mset1$remaining_stc),
MAPE(incidentevtlog_anot_err_mset1$remaining_stc_pmset_median, incidentevtlog_anot_err_mset1$remaining_stc),
MAPE(incidentevtlog_anot_err_mset1$remaining_stc_pmset_sd, incidentevtlog_anot_err_mset1$remaining_stc),
MAPE(incidentevtlog_anot_err_seq1$remaining_stc_pseq_mean, incidentevtlog_anot_err_seq1$remaining_stc),
MAPE(incidentevtlog_anot_err_seq1$remaining_stc_pseq_median, incidentevtlog_anot_err_seq1$remaining_stc),
MAPE(incidentevtlog_anot_err_seq1$remaining_stc_pseq_sd, incidentevtlog_anot_err_seq1$remaining_stc)
)
names(mape_val1) <- c(
"val_mape_pset_mean1","val_mape_pset_median1","val_mape_pset_sd1",
"val_mape_pmset_mean1","val_mape_pmset_median1","val_mape_pmset_sd1",
"val_mape_pseq_mean1","val_mape_pseq_median1","val_mape_pseq_sd1"
)
mape_val1
rmspe_val1 <- c(
RMSPE(incidentevtlog_anot_err_set1$remaining_stc_pset_mean, incidentevtlog_anot_err_set1$remaining_stc),
RMSPE(incidentevtlog_anot_err_set1$remaining_stc_pset_median, incidentevtlog_anot_err_set1$remaining_stc),
RMSPE(incidentevtlog_anot_err_set1$remaining_stc_pset_sd, incidentevtlog_anot_err_set1$remaining_stc),
RMSPE(incidentevtlog_anot_err_mset1$remaining_stc_pmset_mean, incidentevtlog_anot_err_mset1$remaining_stc),
RMSPE(incidentevtlog_anot_err_mset1$remaining_stc_pmset_median, incidentevtlog_anot_err_mset1$remaining_stc),
RMSPE(incidentevtlog_anot_err_mset1$remaining_stc_pmset_sd, incidentevtlog_anot_err_mset1$remaining_stc),
RMSPE(incidentevtlog_anot_err_seq1$remaining_stc_pseq_mean, incidentevtlog_anot_err_seq1$remaining_stc),
RMSPE(incidentevtlog_anot_err_seq1$remaining_stc_pseq_median, incidentevtlog_anot_err_seq1$remaining_stc),
RMSPE(incidentevtlog_anot_err_seq1$remaining_stc_pseq_sd, incidentevtlog_anot_err_seq1$remaining_stc)
)
names(rmspe_val1) <- c(
"val_rmspe_pset_mean1","val_rmspe_pset_median1","val_rmspe_pset_sd1",
"val_rmspe_pmset_mean1","val_rmspe_pmset_median1","val_rmspe_pmset_sd1",
"val_rmspe_pseq_mean1","val_rmspe_pseq_median1","val_rmspe_pseq_sd1"
)
rmspe_val1
#non_fit_arr
result <- rbind(
result,
c(mape_val, rmspe_val, non_fit_arr, non_fit_per_arr, perr_tot_arr,
mape_val1, rmspe_val1)
)
}
return(result)
}
#' Title
#'
#' @param data
#' @param groupvars
#' @param measurevar
#' @param na.rm
#' @param conf.interval
#' @param .drop
#'
#' @return
#' @export
#'
#' @examples
#' summary_set <- gen_summary_pred_fn(incidentevtlog_anot_st, 'set_state_id','remaining_stc')
gen_summary_pred_fn <- function(data=NULL, groupvars=NULL, measurevar, na.rm=TRUE,
conf.interval=.95, .drop=TRUE) {
# New version of length which can handle NA's: if na.rm==T, don't count them
length2 <- function (x, na.rm=FALSE) {
if (na.rm) sum(!is.na(x))
else length(x)
}
# This does the summary. For each group's data frame, return a vector with
# N, mean, and sd
datac <- ddply(data, groupvars, .drop=.drop,
.fun = function(xx, col) {
c(N = length2(xx[[col]], na.rm=na.rm),
mean = ceiling(mean (xx[[col]], na.rm=na.rm)),
sd = ceiling(sd (xx[[col]], na.rm=na.rm)),
median = ceiling(median (xx[[col]], na.rm=na.rm)),
min = ceiling(min (xx[[col]], na.rm=na.rm)),
max = ceiling(max (xx[[col]], na.rm=na.rm))
)
},
measurevar
)
# Rename the "mean" column
#datac <- rename(datac, c("mean" = measurevar))
datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean
# Confidence interval multiplier for standard error
# Calculate t-statistic for confidence interval:
# e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
ciMult <- qt(conf.interval/2 + .5, datac$N-1)
datac$ci <- datac$se * ciMult
# registro para valores não encontrados #non_fitting
datac <- rbind(datac,
c(NIL_STATE, sum(datac$N), mean(datac$mean), sd(datac$mean),
median(datac$mean), min(datac$mean), max(datac$mean))
)
return(datac)
}
#' Title
#'
#' @param fold_events
#' @param resultFile
#' @param type
#' @param fold
#' @param horiz
#'
#' @return
#' @export
#'
#' @examples
#'
#' This function was extrated from the original function
#' #eval_model_gen_fn <- function(lsel_traces_list)
#' #eval_model_gen_fn <- function(events)
#'
annotate_model <- function(fold_events, resultFile, type, fold, horiz)
{
summary_pred_stats <- NULL
result <- NULL
events_anot <- as.data.frame(
fold_events[, c("number", "updated_at", "incident_state", "seq_state_id","set_state_id", "mset_state_id",
"sojourn_set_stc","sojourn_mset_stc","sojourn_seq_stc","elapsed_stc", "remaining_stc")]
)
# filtrar os valores que sao estados finais pois distorcem a media
events_anot_filtered <- events_anot[events_anot$remaining_stc > 0,]
summary_set <- gen_summary_pred_fn(events_anot_filtered, 'set_state_id','remaining_stc')
summary_mset <- gen_summary_pred_fn(events_anot_filtered, 'mset_state_id','remaining_stc')
summary_seq <- gen_summary_pred_fn(events_anot_filtered, 'seq_state_id','remaining_stc')
summary_sj_set <- gen_summary_pred_fn(events_anot_filtered, 'set_state_id','sojourn_set_stc')
summary_sj_mset <- gen_summary_pred_fn(events_anot_filtered, 'mset_state_id','sojourn_mset_stc')
summary_sj_seq <- gen_summary_pred_fn(events_anot_filtered, 'seq_state_id','sojourn_seq_stc')
#armazena totais
summary_pred_stats <- list(summary_set, summary_mset, summary_seq,
summary_sj_set, summary_sj_mset, summary_sj_seq)
# atualiza predited values media, mediana e desvio padrão
# set
events_anot$remaining_stc_set_state_mean <-
summary_set$mean[match(events_anot$set_state_id, summary_set$set_state_id)]
events_anot$sojourn_set_state_mean <-
summary_sj_set$mean[match(events_anot$set_state_id, summary_sj_set$set_state_id)]
# multi set
events_anot$remaining_stc_mset_state_mean <-
summary_mset$mean[match(events_anot$mset_state_id, summary_mset$mset_state_id)]
events_anot$sojourn_mset_state_mean <-
summary_sj_mset$mean[match(events_anot$mset_state_id, summary_sj_mset$mset_state_id)]
# sequence
events_anot$remaining_stc_seq_state_mean <-
summary_seq$mean[match(events_anot$seq_state_id, summary_seq$seq_state_id)]
events_anot$sojourn_seq_state_mean <-
summary_sj_seq$mean[match(events_anot$seq_state_id, summary_sj_seq$seq_state_id)]
# prediction based in the mean and sojourn
#calculate_prediction_f1(events_anot)
# prediction based only in the mean
calculate_prediction_f2(events_anot)
# remove valorers sem match para calculo erro
# funcao remove todas as linhas que tiverem alguma coluna missing (nula)
events_anot_filtered <- na.omit(events_anot)
# remove valores dos estados finais Target = 0 que distorcem a media
# valores do ultimo estado serao sempre precisos
events_anot_filtered <- events_anot_filtered[events_anot_filtered$remaining_stc > 0,]
# calculo erro MAPE p todos os registros
#MAPE(y_pred, y_true)
mape_val <- c(
MAPE(events_anot_filtered$remaining_stc_pset_mean, events_anot_filtered$remaining_stc),
MAPE(events_anot_filtered$remaining_stc_pmset_mean, events_anot_filtered$remaining_stc),
MAPE(events_anot_filtered$remaining_stc_pseq_mean, events_anot_filtered$remaining_stc)
)
names(mape_val) <- c("val_mape_pset_mean","val_mape_pmset_mean","val_mape_pseq_mean")
#non fitting
non_fit_arr <- c(
nrow(fold_events),
nrow(events_anot),
nrow(events_anot[events_anot$set_state_id == NIL_STATE,]),
nrow(events_anot[events_anot$mset_state_id == NIL_STATE,]),
nrow(events_anot[events_anot$seq_state_id == NIL_STATE,]),
length(unique(events_anot$set_state_id)),
length(unique(events_anot$mset_state_id)),
length(unique(events_anot$seq_state_id))
)
names(non_fit_arr) <- c("num_evt_tot","num_evt_ok","num_evt_nf_set",
"num_evt_nf_mset","num_evt_nf_seq", "num_set_states",
"num_mset_states", "num_seq_states")
non_fit_per_arr <- c(
non_fit_arr[c("num_evt_nf_set")] / non_fit_arr[c("num_evt_ok")],
non_fit_arr[c("num_evt_nf_mset")] / non_fit_arr[c("num_evt_ok")],
non_fit_arr[c("num_evt_nf_seq")] / non_fit_arr[c("num_evt_ok")]
)
names(non_fit_per_arr) <- c("perr_nf_set","perr_nf_mset","perr_nf_seq")
non_fit_per_arr <- non_fit_per_arr * 100
# Alexandre: precisa remover esse cálculo aqui, não faz nenhum sentido repetir esse valor
# vide comentário na função original - isso era um mínimo entre média e mediana
perr_tot_arr <- c(mape_val[c("val_mape_pset_mean")],
mape_val[c("val_mape_pmset_mean")],
mape_val[c("val_mape_pseq_mean")])
names(perr_tot_arr) <- c("perr_tot_set","perr_tot_mset","perr_tot_seq")
# filtro para eventos com fit
incidentevtlog_anot_err_set1 <- events_anot_filtered[events_anot_filtered$set_state_id != NIL_STATE,]
incidentevtlog_anot_err_mset1 <- events_anot_filtered[events_anot_filtered$mset_state_id != NIL_STATE,]
incidentevtlog_anot_err_seq1 <- events_anot_filtered[events_anot_filtered$seq_state_id != NIL_STATE,]
mape_val1 <- c(
MAPE(incidentevtlog_anot_err_set1$remaining_stc_pset_mean, incidentevtlog_anot_err_set1$remaining_stc),
MAPE(incidentevtlog_anot_err_mset1$remaining_stc_pmset_mean, incidentevtlog_anot_err_mset1$remaining_stc),
MAPE(incidentevtlog_anot_err_seq1$remaining_stc_pseq_mean, incidentevtlog_anot_err_seq1$remaining_stc)
)
names(mape_val1) <- c("val_mape_pset_mean1", "val_mape_pmset_mean1", "val_mape_pseq_mean1")
# appends to the file that contains all the stats so the error can be recalculated later
events_anot <- cbind(type,fold,horiz,events_anot)
write.table(events_anot, file=resultFile, row.names=FALSE, col.names = TRUE, append=TRUE, sep=";", dec=",")
# returns only the summarized results for the given fold and horizon
result <- c(fold=type, horizon=horiz, mape_val, non_fit_arr, non_fit_per_arr, perr_tot_arr, mape_val1)
return(result)
}
#' Fórmula 1 para o cálculo da predição, que leva em conta o sojourn atual e a média
#' de sojourn no estado. É a fórmula do código original.
#'
#' @param events_anot
#'
#' @return
#' @export
#'
#' @examples
calculate_prediction_f1 <- function(aevents_anot) {
events_anot <- aevents_anot
events_anot$remaining_stc_pset_mean <-
events_anot$remaining_stc_set_state_mean +
events_anot$sojourn_set_state_mean -
events_anot$sojourn_set_stc
events_anot$remaining_stc_pmset_mean <-
events_anot$remaining_stc_mset_state_mean +
events_anot$sojourn_mset_state_mean -
events_anot$sojourn_mset_stc
events_anot$remaining_stc_pseq_mean <-
events_anot$remaining_stc_seq_state_mean +
events_anot$sojourn_seq_state_mean -
events_anot$sojourn_seq_stc
eval.parent(substitute(aevents_anot<-events_anot))
}
#' Fórmula 2, que leva em conta apenas a média do remaining.
#'
#' @param events_anot
#'
#' @return
#' @export
#'
#' @examples
calculate_prediction_f2 <- function(aevents_anot) {
events_anot <- aevents_anot
events_anot$remaining_stc_pset_mean <- events_anot$remaining_stc_set_state_mean
events_anot$remaining_stc_pmset_mean <- events_anot$remaining_stc_mset_state_mean
events_anot$remaining_stc_pseq_mean <- events_anot$remaining_stc_seq_state_mean
eval.parent(substitute(aevents_anot<-events_anot))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.