This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

#load packages
library(batchanalytics)
library(bupaR)
library(bamalog)
library(tidyr)
library(readr)
library(lubridate)
#patients artificial but start and end times


#sepsis, roadtraffic,

#df <- patients

#df <- sepsis

#df <- hospital_billing

df <- traffic_fines


#check the batch_anayltics sample for different read in from so after git import everyone can use this -> no path visible
#setwd("C:\\Users\\Niklas\\Desktop\\BachelorArbeit\\EventLogs\\real_world_event_logs")

#lib batch ana verwenden - check ob neuster stand



#csv_log <- read.csv("1k_Filtered data of BPI Challenge 2017.csv")



#print(head(csv_log))
#adding timestamp cols

df$arrival <- as.POSIXct(df$timestamp, format = "%Y-%m-%d %H:%M:%S", tz = "GMT") 



df$start <- as.POSIXct(df$timestamp, format = "%Y-%m-%d %H:%M:%S", tz = "GMT") 
df$complete <-  as.POSIXct(df$timestamp, format = "%Y-%m-%d %H:%M:%S", tz = "GMT")



head(df)
names(df)
#"case_id","activity","resource","arrival","start","complete","instance_id" und lifecycle

#create new table

newtable <- cbind( df[1], df[2],df[4], df[19], df[20], df[5],df[17], df[3])
head(newtable)
#add arrival col with minus 5 min from start -< posixct = seconds from..

newtable$arrival <- as.POSIXct(df$arrival - 10*60 )

newtable$start <- as.POSIXct(df$start - 5*60 )


head(newtable)
#filter most frequent cases

t1 <- newtable %>% eventlog(
        case_id = "case_id",
        activity_id = "activity",
        activity_instance_id = "activity_instance_id",
        lifecycle_id = "lifecycle",
        timestamp = "timestamp",#vorher complete
        resource_id = "resource"
    )  %>% filter_trace_frequency(percentage = 0.15)

evaluate batching

#create data frame format names(task_log) <- c("case_id", "activity", "resource", "arrival", "start", "complete","instance_id" )

task_log <- cbind( t1[1], t1[2],t1[3], t1[4], t1[5], t1[6],t1[7])

head(task_log)
#convert like bipc  2017

names(task_log) <- c("case_id", "activity", "resource", "arrival", "start", "complete","instance_id" )
head(t1)

class(t1)

#write transformed log to csv

#write.csv(df, "C:\\Users\\Niklas\\Desktop\\BachelorArbeit\\EventLogs\\real_world_event_logs\\sepsis_filtered.csv", row.names = FALSE)




#code besser in funktion -< refaktor dann kein doppelten code
#maybe safe in gitHub -> how did i transform my event log ; creation of event log short describtion
# write

#check before exe
write.csv(task_log, "C:\\Users\\Niklas\\Desktop\\BachelorArbeit\\EventLogs\\real_world_event_logs\\traffic.csv", row.names = FALSE)
setwd("C:\\Users\\Niklas\\Desktop\\BachelorArbeit\\EventLogs\\real_world_event_logs")


csv_log <- read.csv("traffic_fines_ready_for_analysis.csv")

#class(csv_log)
#fix na vals issue
csv_log$resource[is.na(csv_log$resource)] <- "ResX"


head(csv_log)
# write

#check before exe
write.csv(csv_log, "C:\\Users\\Niklas\\Desktop\\BachelorArbeit\\EventLogs\\real_world_event_logs\\traffic_fines_ready_for_analysis.csv", row.names = FALSE)

check bama reading



NiklasCarlos1994/batchanalytics documentation built on Dec. 17, 2021, 5:25 a.m.