#' Load the event-logs
#'
#' @description A loader for csv based log files. It also calculates the footprint table, transition matrix probabilities, and presents data in different shapes. The public methods are:
#' \itemize{
#' \item \code{dataLoader() } the costructor
#' \item \code{load.csv( ... ) } loads the csv file into the \code{dataLoader} object
#' \item \code{load.data.frame() } loads a data.frame into the \code{dataLoader} object
#' \item \code{getData() } return the processed, previously-loaded, data
#' \item \code{removeEvents() } remove the events in the array passed as argument (dual of \code{dataLoader::keepOnlyEvents()} )
#' \item \code{keepOnlyEvents() } keep only the events in the array passed as argument (dual of \code{dataLoader::removeEvents()} )
#' \item \code{addDictionary() } add a dictionary in order, afterward, to translate or group some event name
#' \item \code{getTranslation() } perform a translation applying the given dictionary to the loaded csv or data.frame
#' \item \code{plot.Timeline() } plot the timeline of the events regarding a single patient
#' }
#' In order to better undestand the use of such methods, please visit: www.pminer.info
#'
#' The consturctor admit the following parameters:
#' verbose.mode are some notification wished, during the computation? The defaul value is \code{true}
#' @param verbose.mode boolean. If TRUE some messages will appear in console, during the computation; otherwise the computation will be silent.
#' @param save.memory boolean. If TRUE, dataLoader() avoid to keep in memory the entire original csv.
#' @param max.char.length.label numeric. It defines the max length of the event name strings
#' @import stats progress R.utils
#' @importFrom data.table data.table
#' @importFrom lubridate hour hours minute minutes week weeks month day days
#' @importFrom stringr str_replace_all str_replace str_locate_all str_locate str_c str_trim str_sub str_trim
#' @export
#' @useDynLib pMineR
#' @examples \dontrun{
#'
#' # create a Loader
#' obj.L<-dataLoader();
#'
#' # Load a .csv
#' obj.L$load.csv(nomeFile = "../otherFiles/mammella.csv",
#' IDName = "CODICE_SANITARIO_ADT",
#' EVENTName = "DESC_REPARTO_RICOVERO",
#' dateColumnName = "DATA_RICOVERO")
#'
#' # return the results
#' obj.L$getData()
#'
#' }
dataLoader<-function( verbose.mode = TRUE, max.char.length.label = 50, save.memory = FALSE ) {
arrayAssociativo<-''
footPrint<-''
MMatrix<-''
pat.process<-''
wordSequence.raw<-''
MM.mean.time<-''
MM.mean.outflow.time<-''
MM.density.list<-''
MM.den.list.high.det<-''
list.dictionary<-''
list.dict.column.event.name<-''
input.format.date<-''
max.pMineR.internal.ID.Evt <-''
original.CSV <- ''
# print(max.char.length.label)
param.IDName<-''
param.EVENTName<-''
param.dateColumnName<-''
param.verbose<-''
param.max.char.length.label<-'';
param.column.names<-''
param.save.memory<-'';
obj.LH<-''
global.personal.ID<-NA
#=================================================================================
# clearAttributes
# this method clear all the attributes in order to make the object re-useable
# for other issues ( dirty variables could have dramatic effetcs! )
#=================================================================================
clearAttributes<-function() {
costructor( verboseMode = param.verbose , max.char.length.label = param.max.char.length.label,saveMemory = param.save.memory )
}
#=================================================================================
# addDictionary
#=================================================================================
addDictionary<-function( fileName, sep =',', dict.name='main' , column.event.name) {
list.dictionary[[ dict.name ]] <<- read.csv(fileName,header = T,sep = sep)
list.dict.column.event.name[[ dict.name ]] <<- column.event.name
}
#=================================================================================
# addDataDescription
#=================================================================================
addDataDescription<-function( fileName, dataDescriptionName="default" ) {
list.dataDescription[[ dataDescriptionName ]] <<- xmlInternalTreeParse(fileName)
# Cerca la lista delle trasformazioni da operare
listaTrasformazioni<-xpathApply(doc,'//virtualAttribute',xmlValue)
aaa <- xpathApply(doc,'//virtualAttribute/rule[@name=""]',xmlValue)
browser()
ct <- 1
}
#=================================================================================
# getTranslation
#=================================================================================
getTranslation<-function( column.name , dict.name = 'main', toReturn="csv") {
# Se era stato indicato un dizionario (e la relativa colonna) caricalo
# e popola una colonna aggiuntiva
new.myData<-c()
if(param.verbose == TRUE) obj.LH$sendLog(" 1) Converting the Events for all the patients :\n")
if(param.verbose == TRUE) pb <- txtProgressBar(min = 0, max = length(names(pat.process)), style = 3)
pb.ct <- 0
for(idPaz in names(pat.process)) {
pb.ct <- pb.ct + 1;
if(param.verbose == TRUE) setTxtProgressBar(pb, pb.ct)
matrice<-pat.process[[idPaz]]
names(matrice)<-names(pat.process[[idPaz]])
aaa<-as.character(pat.process[[idPaz]][[param.EVENTName]])
bbb<-unlist(lapply(aaa, function(x) {
# prendi la voce corrispondente al nome dell'evento
column.event.name<-list.dict.column.event.name[[ dict.name ]]
arrPosizioniTMP<-which(list.dictionary[[ dict.name ]][[ column.event.name ]]==x )
if(length(arrPosizioniTMP)>1) stop("Error! an Event is associated to more possible new Event names!")
# e sostituisci
if(length(arrPosizioniTMP)==0) return( "" )
else return(as.character( list.dictionary[[ dict.name ]][[ column.name ]][arrPosizioniTMP]) )
} ))
# if(param.verbose==TRUE) cat("\n Grouping now the events of the patient: ",idPaz)
matrice[[param.EVENTName]] <- bbb
matrice <- matrice[ which(matrice[[param.EVENTName]]!="") , ]
new.myData <- rbind(new.myData,matrice)
}
if(param.verbose == TRUE) close(pb)
if(toReturn=="csv") { daRestituire <- new.myData }
if(toReturn=="dataLoader"){
if(param.verbose == TRUE) obj.LH$sendLog(" 2) Create a new dataLoader object (this splits in many steps) :\n")
# Istanzia un oggetto dataLoader che eridita il parametro "verbose"
daRestituire<-dataLoader()
daRestituire$load.data.frame(mydata = new.myData,
IDName = param.IDName,EVENTName = param.EVENTName,
dateColumnName = param.dateColumnName,format.column.date = "%d/%m/%Y %H:%M:%S")
}
return(daRestituire)
}
#=================================================================================
# ricalcolaCSV
# Ricalcola il CSV togliendo pazienti e/o eventi a piacere
#=================================================================================
ricalcolaCSV<-function(
array.events.to.remove=c(),
array.events.to.keep=c(),
array.pazienti.to.remove=c(),
array.pazienti.to.keep=c(),
remove.patients.by.attribute.name = NA,
remove.events.by.attribute.name = NA,
keep.events.by.attribute.name = NA,
keep.patients.by.attribute.name = NA,
by.arr.attribute.value= c() ,
is.debug = FALSE
) {
matriciona <- c()
# Costruisci il super CSV
# browser()
if(param.save.memory == FALSE) CSV.completo <- original.CSV
else CSV.completo <- do.call(rbind, pat.process)
# array.pazienti.to.keep
if(length(array.pazienti.to.keep)>0) {
CSV.completo <- CSV.completo[ CSV.completo[ ,param.IDName] %in% array.pazienti.to.keep, ]
}
# array.pazienti.to.remove
if(length(array.pazienti.to.remove)>0) {
CSV.completo <- CSV.completo[ !(CSV.completo[ ,param.IDName] %in% array.pazienti.to.remove), ]
}
# array.events.to.remove e array.events.to.keep
posizione.colonna.evento <- which(colnames(pat.process[[1]]) == param.EVENTName) -1
if(length(array.events.to.remove)>0 | length(array.events.to.keep)>0) {
res <- filterPatProcess( CSV.completo, c(":)",array.events.to.remove), c(":)",array.events.to.keep) , posizione.colonna.evento )
CSV.completo <- CSV.completo[res$rigaDaTenere,]
}
# 'remove.events.by.attribute.name' e 'keep.events.by.attribute.name'
# Rimuovi i record in cui una colonna specifica ha il valore indicato.
# Nome della colonna e valori sono passati in due array dalle posizioni corrispondenti
if( !is.na(remove.events.by.attribute.name) ) {
for( iii in seq(1,length(remove.events.by.attribute.name))) {
CSV.completo <- CSV.completo[ which(CSV.completo[, remove.events.by.attribute.name[iii] ] == by.arr.attribute.value[iii]), ]
}
}
if( !is.na(keep.events.by.attribute.name) ) {
for( iii in seq(1,length(remove.events.by.attribute.name))) {
CSV.completo <- CSV.completo[ which(CSV.completo[, remove.events.by.attribute.name[iii] ] != by.arr.attribute.value[iii]), ]
}
}
# 'remove.patients.by.attribute.name' e 'keep.patients.by.attribute.name'
# Rimuovi i record in cui una colonna specifica ha il valore indicato.
# Nome della colonna e valori sono passati in due array dalle posizioni corrispondenti
if(!is.na(remove.patients.by.attribute.name)) {
for( iii in seq(1,length(remove.patients.by.attribute.name))) {
lista.pazienti <- unique(CSV.completo[ which(CSV.completo[, remove.patients.by.attribute.name[iii] ] == by.arr.attribute.value[iii]), param.IDName])
CSV.completo <- CSV.completo[ which( !(CSV.completo[, param.IDName ] %in% lista.pazienti)), ]
}
}
if(!is.na(keep.patients.by.attribute.name)) {
# browser()
for( iii in seq(1,length(keep.patients.by.attribute.name))) {
lista.pazienti <- unique(CSV.completo[ which(CSV.completo[, keep.patients.by.attribute.name[iii] ] == by.arr.attribute.value[iii]), param.IDName])
CSV.completo <- CSV.completo[ which(CSV.completo[, param.IDName ] %in% lista.pazienti), ]
}
}
return(CSV.completo)
}
old.old.ricalcolaCSV<-function(
array.events.to.remove=c(),
array.events.to.keep=c(),
array.pazienti.to.remove=c(),
array.pazienti.to.keep=c(),
remove.patients.by.attribute.name = NA,
remove.events.by.attribute.name = NA,
keep.events.by.attribute.name = NA,
keep.patients.by.attribute.name = NA,
by.arr.attribute.value= c() ,
is.debug = FALSE
) {
matriciona <- c()
# Costruisci la lista dei pazienti da analizzare nel caso in cui sia stato
# dichiarato esplicitamente quali tenere
ID.Pazienti.Validi<-names(pat.process)
if( length(array.pazienti.to.keep) > 0 ) { ID.Pazienti.Validi <- array.pazienti.to.keep }
else {ID.Pazienti.Validi <- names(pat.process)[ !( names(pat.process) %in% array.pazienti.to.remove ) ] }
if(param.verbose == TRUE) obj.LH$sendLog(" 0) Cleaning original dataset :\n")
if(param.verbose == TRUE) pb <- txtProgressBar(min = 0, max = length(ID.Pazienti.Validi), style = 3)
pb.ct <- 1
# if(is.debug==TRUE) browser();
# loopa sui pazienti validi
for(patID in ID.Pazienti.Validi ) {
# cat("\n",patID)
skip.patient <- FALSE
pb.ct <- pb.ct + 1;
if(param.verbose == TRUE) setTxtProgressBar(pb, pb.ct)
# browser()
# EVENTS --------------- (remove events)
if(length(array.events.to.remove)>0) {
submatrix <- pat.process[[patID]][ which( !(pat.process[[patID]][ ,param.EVENTName ] %in% array.events.to.remove )), param.column.names]
}
if(length(array.events.to.keep)>0) {
submatrix <- pat.process[[patID]][ which( (pat.process[[patID]][ ,param.EVENTName ] %in% array.events.to.keep )), param.column.names]
}
# if(length(array.events.to.keep)==0 & length(array.events.to.remove)==0) {
# submatrix <- pat.process[[patID]][ , param.column.names]
# }
# ATTRIBUTE NAME --------------- (remove events)
if( !is.na(remove.events.by.attribute.name) ) {
# browser()
submatrix <- pat.process[[patID]][ which( !(pat.process[[patID]][ ,remove.events.by.attribute.name ] %in% by.arr.attribute.value )), param.column.names]
}
if( !is.na(keep.events.by.attribute.name) ) {
# browser()
submatrix <- pat.process[[patID]][ which( (pat.process[[patID]][ ,keep.events.by.attribute.name ] %in% by.arr.attribute.value )), param.column.names]
}
# ATTRIBUTE NAME --------------- (remove patients)
if(!is.na(remove.patients.by.attribute.name)) {
if(length( which( (pat.process[[patID]][ ,remove.patients.by.attribute.name ] %in% by.arr.attribute.value )))>0) {
submatrix <- c()
}
else { submatrix <- pat.process[[patID]] }
}
if(!is.na(keep.patients.by.attribute.name)) {
if(length( which( (pat.process[[patID]][ ,keep.patients.by.attribute.name ] %in% by.arr.attribute.value )))==0) {
submatrix <- c()
}
else { submatrix <- pat.process[[patID]] }
}
# KEEP PATIENTS
if(length(array.pazienti.to.keep)>0) {
if( patID %in% array.pazienti.to.keep) submatrix <- pat.process[[patID]]
}
# if(pat.process[[patID]][ ,"TRIAGE" ][ 1 ]=="Yellow" ) browser()
# if(skip.patient==FALSE) {
matriciona <- rbind( matriciona, submatrix )
# }
}
if(param.verbose == TRUE) close(pb)
return(matriciona)
}
#=================================================================================
# applyFilter
#=================================================================================
applyFilter<-function(
array.events.to.keep=c(),
array.events.to.remove=c(),
array.pazienti.to.keep=c(),
array.pazienti.to.remove=c(),
remove.events.by.attribute.name = NA,
remove.patients.by.attribute.name = NA,
keep.events.by.attribute.name = NA,
keep.patients.by.attribute.name = NA,
by.arr.attribute.value = c(),
whatToReturn="itself",
is.debug=FALSE) {
if(!(whatToReturn %in% c( "itself" , "csv" ,"dataLoader" ) ) ) {
obj.LH$sendLog( c(" 'whatToReturn can only be 'itself', 'csv' or 'dataLoader'! ") ,"ERR"); return()
}
matriciona <- as.data.frame(ricalcolaCSV( array.events.to.remove = array.events.to.remove,
array.events.to.keep = array.events.to.keep,
array.pazienti.to.remove = array.pazienti.to.remove,
array.pazienti.to.keep = array.pazienti.to.keep,
remove.patients.by.attribute.name = remove.patients.by.attribute.name,
remove.events.by.attribute.name = remove.events.by.attribute.name,
keep.events.by.attribute.name = keep.events.by.attribute.name,
keep.patients.by.attribute.name = keep.patients.by.attribute.name,
by.arr.attribute.value = by.arr.attribute.value,
is.debug = is.debug
))
# browser()
IDName <- param.IDName
EVENTName <- param.EVENTName
dateColumnName <- param.dateColumnName
if( whatToReturn == "itself"){
load.data.frame( mydata = matriciona, IDName = IDName, EVENTName = EVENTName,
dateColumnName = dateColumnName , format.column.date = "%d/%m/%Y %H:%M:%S",
convertUTF = FALSE, suppress.invalid.date = FALSE)
}
if( whatToReturn == "csv"){
return(matriciona);
}
if( whatToReturn == "dataLoader"){
newObj <- dataLoader();
newObj$load.data.frame( mydata = matriciona, IDName = IDName, EVENTName = EVENTName,
dateColumnName = dateColumnName , format.column.date = "%d/%m/%Y %H:%M:%S",
convertUTF = FALSE, suppress.invalid.date = FALSE)
return(newObj);
}
}
#=================================================================================
# removeEvents
# array.events: the array of Events to remove
# min.abs.freq: the threshold to keep an event (absolute frequences): NOT YET IMPLEMENTED
#=================================================================================
removeEvents<-function( array.events=NA) {
matriciona <- as.data.frame(ricalcolaCSV( array.events.to.remove = array.events))
IDName <- param.IDName
EVENTName <- param.EVENTName
dateColumnName <- param.dateColumnName
load.data.frame( mydata = matriciona, IDName = IDName, EVENTName = EVENTName,
dateColumnName = dateColumnName , format.column.date = "%d/%m/%Y %H:%M:%S",
convertUTF = FALSE, suppress.invalid.date = FALSE)
# browser()
#
#
#
# bbb<-array.events
#
# # (1) arrayAssociativo
# arrayAssociativo<<-arrayAssociativo[!(arrayAssociativo %in% bbb)]
# if(is.matrix(footPrint)) {
# footPrint<<-footPrint[ !(rownames(footPrint) %in% bbb),!(colnames(footPrint) %in% bbb) ]
# }
#
# # (2) MMatrix
# MMatrix<<-MMatrix[ !(rownames(MMatrix) %in% bbb),!(colnames(MMatrix) %in% bbb) ]
#
# # (3) MM.mean.time
# MM.mean.time<<- MM.mean.time[ !(rownames(MM.mean.time) %in% bbb),!(colnames(MM.mean.time) %in% bbb) ]
#
# # (4) wordSequence.raw
# # (5) pat.process
# new.list.density<-list()
# for(i in seq(1,length(pat.process))) {
# pat.process[[i]]<<-pat.process[[i]][which(!(pat.process[[i]][[param.EVENTName]] %in% array.events)),]
# wordSequence.raw[[i]]<<-wordSequence.raw[[i]][ !( wordSequence.raw[[i]] %in% array.events)]
# }
#
# # (6) MM.density.list
# for( name.from in names(MM.density.list)) {
# if( !(name.from %in% array.events)) {
# if(is.null(new.list.density[[name.from]])) new.list.density[[name.from]]<-list()
# for( name.to in names(new.list.density[[name.from]])) {
# if(!(name.to %in% array.events )) {
# new.list.density[[name.from]][[name.to]]<-MM.density.list[[name.from]][[name.to]]
# }
# }
# }
# }
# MM.density.list<<-new.list.density
}
#=================================================================================
# keepOnlyEvents
# array.events: the array of Events to keep
#=================================================================================
keepOnlyEvents<-function( array.events=NA) {
# calcola quelli da togliere
eventi.da.togliere <- arrayAssociativo[!(arrayAssociativo %in% array.events)]
eventi.da.togliere <- eventi.da.togliere[!(eventi.da.togliere %in% c("BEGIN","END"))]
removeEvents(array.events = eventi.da.togliere)
}
#=================================================================================
# getAttribute
#=================================================================================
getAttribute<-function( attributeName ) {
if(attributeName=="pat.process") return( pat.process )
if(attributeName=="MMatrix.perc") {
MM<-MMatrix;
for( i in seq( 1 , nrow(MM)) ) {if(sum(MM[i,])>0) {MM[i,]<-MM[i,]/sum(MM[i,]);} }
return(MM);
}
if(attributeName=="MMatrix") return( MMatrix )
if(attributeName=="footPrint") return( footPrint )
if(attributeName=="MMatrix.perc.noLoop") {
MM<-MMatrix;
diag(MM)<-0;
for( i in seq( 1 , nrow(MM)) ) {if(sum(MM[i,])>0) {MM[i,]<-MM[i,]/sum(MM[i,]);} }
return(MM);
}
return();
}
#=================================================================================
# groupPatientLogActivity
# raggruppa i dati, come sono da CSV in una maniera piu' consona ad essere analizzati
#=================================================================================
groupPatientLogActivity<-function(mydata, ID.list.names) {
# prendi la lista di pazienti e
# per ogni paziente costruisci i gruppi
ID.list<-unique(mydata[[ID.list.names]])
ID.act.group<-list();
paziente.da.tenere<-c()
dimensioni.tabelle <- table(mydata[,ID.list.names])
# Fai lo split del data.frame in una lista di data.frame, rispetto al campo dell' ID
ID.act.group = split(mydata, list(mydata[[ID.list.names]]))
# prendi i pazienti da tenere (con almeno due eventi)
paziente.da.tenere <- names(dimensioni.tabelle)[which(dimensioni.tabelle>=2)]
return(
list(
"ID.act.group" = ID.act.group,
"paziente.da.tenere" = paziente.da.tenere
)
)
}
old.old.groupPatientLogActivity<-function(mydata, ID.list.names) {
# prendi la lista di pazienti e
# per ogni paziente costruisci i gruppi
ID.list<-unique(mydata[[ID.list.names]])
ID.act.group<-list();
paziente.da.tenere<-c()
if(param.verbose == TRUE) pb <- txtProgressBar(min = 0, max = length(ID.list), style = 3)
pb.ct <- 0
# browser()
for(i in ID.list) {
pb.ct <- pb.ct + 1;
if( param.verbose == TRUE ) setTxtProgressBar(pb, pb.ct)
# prendi i soli record che afferiscono al paziente in esame
ID.act.group[[i]]<-mydata[ which(mydata[[ID.list.names]]==i ), ]
if(nrow(ID.act.group[[i]])>2) {paziente.da.tenere <- c(paziente.da.tenere,i) }
}
# browser()
if(param.verbose == TRUE) close(pb)
return(
list(
"ID.act.group" = ID.act.group,
"paziente.da.tenere" = paziente.da.tenere
)
)
}
setData<-function( dataToSet ) {
# set the desired attribute (the ones passed as !is.na() )
nomiAttributi<-names(dataToSet)
if( "arrayAssociativo" %in% nomiAttributi ) arrayAssociativo<<-dataToSet$arrayAssociativo
if( "footPrint" %in% nomiAttributi ) footPrint<<-dataToSet$footPrint
if( "MMatrix" %in% nomiAttributi ) MMatrix<<-dataToSet$MMatrix
if( "pat.process" %in% nomiAttributi ) pat.process<<-dataToSet$pat.process
if( "wordSequence.raw" %in% nomiAttributi ) wordSequence.raw<<-dataToSet$wordSequence.raw
}
order.list.by.date<-function( listToBeOrdered, dateColumnName, deltaDate.column.name='pMineR.deltaDate',
format.column.date = "%d/%m/%Y %H:%M:%S" ) {
if(param.verbose == TRUE) pb <- txtProgressBar(min = 0, max = length(listToBeOrdered), style = 3)
# Cicla per ogni paziente
for( paziente in seq(1,length(listToBeOrdered)) ) {
if( param.verbose == TRUE ) setTxtProgressBar(pb, paziente)
# Estrai la matrice
matrice.date<-listToBeOrdered[[paziente]]
# Leggi la colonna data secondo la formattazione indicata in ingresso e riscrivila nel formato %d/%m/%Y (lo stesso viene fatto in plot.Timeline)
newdate <- strptime(as.character(matrice.date[,dateColumnName]), format.column.date)
matrice.date[,dateColumnName] <- format(newdate, "%d/%m/%Y %H:%M:%S")
# Calcola la colonna delle differenze di date rispetto ad una data di riferimento ed azzera rispetto al minore
# colonna.delta.date.TMPh898h98h9<-as.numeric(difftime(as.POSIXct(matrice.date[, dateColumnName], format = "%d/%m/%Y"),as.POSIXct("01/01/2001", format = "%d/%m/%Y"),units = 'days'))
colonna.delta.date.TMPh898h98h9<-as.numeric(difftime(as.POSIXct(matrice.date[, dateColumnName], format = "%d/%m/%Y %H:%M:%S"),as.POSIXct("01/01/2001 00:00:00", format = "%d/%m/%Y %H:%M:%S"),units = 'mins'))
colonna.delta.date.TMPh898h98h9<-colonna.delta.date.TMPh898h98h9-min(colonna.delta.date.TMPh898h98h9)
# Aggiungi la colonna dei delta data
listToBeOrdered[[paziente]]<-cbind(listToBeOrdered[[paziente]],colonna.delta.date.TMPh898h98h9)
colnames(listToBeOrdered[[paziente]])<-c(colnames(listToBeOrdered[[paziente]])[1:length(colnames(listToBeOrdered[[paziente]]))-1],deltaDate.column.name)
# Ordina il data.frame di ogni paziente per la colonna DeltaT
listToBeOrdered[[paziente]]<-listToBeOrdered[[paziente]][order(listToBeOrdered[[paziente]][[deltaDate.column.name]]),]
}
if(param.verbose == TRUE) close(pb)
return(listToBeOrdered);
}
load.data.frame<-function( mydata, IDName, EVENTName, dateColumnName=NA,
format.column.date = "%d/%m/%Y %H:%M:%S",
convertUTF = TRUE, suppress.invalid.date = TRUE) {
# clear all the attributes
obj.Utils <- utils()
clearAttributes( );
param.column.names<<-colnames(mydata)
# print(c("1",date()))
# browser()
# aaaaaaa <- mydata
if(length(mydata[[dateColumnName]]) == 0) { obj.LH$sendLog( c("dateColumnName '",dateColumnName,"' not present! ") ,"ERR"); return() }
if(length(mydata[[EVENTName]]) == 0) { obj.LH$sendLog( c("EVENTName '",EVENTName,"' not present! ") ,"ERR"); return() }
if(length(mydata[[IDName]]) == 0) { obj.LH$sendLog( c("IDName '",IDName,"' not present! ") ,"ERR"); return() }
obj.dataProcessor <- dataProcessor()
# Add an internal ID attribute to myData (to uniquely identify Logs)
if(!("pMineR.internal.ID.Evt" %in% colnames(mydata) ))
{ mydata <- cbind("pMineR.internal.ID.Evt"=seq(1,nrow(mydata)),mydata ) }
max.pMineR.internal.ID.Evt <<- max(mydata$pMineR.internal.ID.Evt)
# Change the DATA FORMAT!
mydata[[dateColumnName]] <- as.character(mydata[[dateColumnName]] )
mydata[[dateColumnName]] <- strptime(as.character(mydata[[dateColumnName]]), format.column.date)
mydata[[dateColumnName]] <- format(mydata[[dateColumnName]],"%d/%m/%Y %H:%M:%S")
format.column.date <- "%d/%m/%Y %H:%M:%S"
# print(c("2",date()))
if(suppress.invalid.date==TRUE) {
mydata <- mydata[ which(mydata[[dateColumnName]]!="" ),]
}
# Just to have then an idea of the passed parameters...
param.IDName<<-IDName
param.EVENTName<<-EVENTName
param.dateColumnName<<-dateColumnName
input.format.date<<- format.column.date
# ok, let's begin!
ID.list.names<-IDName
EVENT.list.names<-EVENTName
mydata[[EVENT.list.names]]<-as.character(mydata[[EVENT.list.names]])
if(convertUTF == TRUE) {
mydata <- obj.Utils$cleanUTF(mydata,EVENT.list.names)
mydata[[EVENT.list.names]] <- gsub("\"", "", mydata[[EVENT.list.names]])
mydata[[EVENT.list.names]] <- gsub("$", "", mydata[[EVENT.list.names]])
mydata[[EVENT.list.names]] <- gsub("'", "", mydata[[EVENT.list.names]])
}
# print(c("3",date()))
mydata[[ID.list.names]]<-as.character(mydata[[ID.list.names]])
if(!is.na(dateColumnName)) {
mydata[[dateColumnName]]<-as.character(mydata[[dateColumnName]])
}
if(verbose.mode == TRUE) obj.LH$sendLog("\n 1) internal Grouping (1/3):\n Please wait ....... ")
# group the log of the patient in a structure easier to handle
ooo <- groupPatientLogActivity(mydata, ID.list.names)
if(verbose.mode == TRUE) obj.LH$sendLog("\n Done! \n ")
ID.act.group<-ooo$ID.act.group
paziente.da.tenere<-ooo$paziente.da.tenere
# print(c("4",date()))
# Se non ci sono almeno due eventi per ogni paziente, togli il paziente dalla lista
# (e dal data frame originale)
# Se non ci sono almeno due eventi per il paziente, toglilo dalla lista
ID.act.group <- ID.act.group[ paziente.da.tenere ]
mydata <- mydata[ ( mydata[[IDName]] %in% paziente.da.tenere ), ]
# if(verbose.mode == TRUE) cat("\n 2) Ordering date:\n")
if(verbose.mode == TRUE) obj.LH$sendLog(" 2) Ordering date (2/3):\n")
# Order the list by the interested date (if exists)
if(!is.na(dateColumnName)) {
if(length(ID.act.group)==0) browser()
ID.act.group<-order.list.by.date(listToBeOrdered = ID.act.group, dateColumnName = dateColumnName, format.column.date = format.column.date)
}
# print(c("5",date()))
# if(verbose.mode == TRUE) cat("\n 3) Building MMatrices and other stuff")
if(verbose.mode == TRUE) obj.LH$sendLog(" 3) Building MMatrices and other stuff (3/3):\n")
# build the MM matrix and other stuff...
res <- obj.dataProcessor$buildMMMatrices.and.other.structures(mydata = mydata,
EVENT.list.names = EVENT.list.names,
EVENTName = EVENTName,
EVENTDateColumnName = param.dateColumnName,
ID.act.group = ID.act.group,
max.char.length.label = param.max.char.length.label,
verbose.mode = param.verbose
)
# print(c("5",date()))
if(res$error == TRUE) {
if(res$errCode == 1) {obj.LH$sendLog( "event '' (BLANK) detected, please check the file\n" ,"ERR"); return()}
if(res$errCode == 2) {obj.LH$sendLog( c("an event has a label with a length greter than ",param.max.char.length.label," chars...\n") ,"ERR"); return()}
if(res$errCode == 3) {obj.LH$sendLog( "at least an event has an invalid char in the label (',$,\")\n" ,"ERR"); return()}
}
if( sum( is.na(mydata[[dateColumnName]]) ) > 0 ) { obj.LH$sendLog( c("at least one date is set to NA, please check loaded data and data format! (patients: ",paste( mydata[which(is.na(mydata[[dateColumnName]])),IDName] ,collapse = ','),") \n") ,"ERR"); return()}
# res<-buildMMMatrices.and.other.structures(mydata = mydata,
# EVENT.list.names = EVENT.list.names,
# EVENTName = EVENTName,
# ID.act.group = ID.act.group)
#populate the internal attributes
# browser()
arrayAssociativo<<-res$arrayAssociativo
footPrint<<-res$footPrint
MMatrix<<-res$MMatrix
pat.process<<-res$pat.process
wordSequence.raw<<-res$wordSequence.raw
MM.mean.time<<-res$MM.mean.time
MM.mean.outflow.time<<-res$MM.mean.outflow.time
MM.density.list<<-res$MM.density.list
MM.den.list.high.det <<- res$MM.den.list.high.det
if(save.memory == FALSE) original.CSV <<- mydata
}
#=================================================================================
# load.csv
#=================================================================================
load.csv.GUI<-function( fileName ) {
fileName <- getAbsolutePath(pathname = fileName)
# prepara un env che poi verrà distrutto
.GlobalEnv$pMineR.IO.shiny.dataLoader.list <- list( "nomeDelFile" = fileName )
on.exit(rm(pMineR.IO.shiny.dataLoader.list, envir=.GlobalEnv))
# Lancia la APP
runApp(appDir = system.file("shiny-gui", "dataLoader.load.csv", package = "pMineR"))
# carica il CSV
if(pMineR.IO.shiny.dataLoader.list$esito == "carica") {
load.csv(nomeFile = fileName, IDName=pMineR.IO.shiny.dataLoader.list$IDColumnName,
EVENTName = pMineR.IO.shiny.dataLoader.list$eventColumnName,
dateColumnName = pMineR.IO.shiny.dataLoader.list$dateColumnName,
sep = pMineR.IO.shiny.dataLoader.list$sep, format.column.date = pMineR.IO.shiny.dataLoader.list$formatoData,
convertUTF = pMineR.IO.shiny.dataLoader.list$UTF8ForceConversion,
suppress.invalid.date = pMineR.IO.shiny.dataLoader.list$badDateSuppressing )
}
}
load.csv<-function( nomeFile, IDName, EVENTName, quote="\"",sep = ",", dateColumnName=NA,
format.column.date="%d/%m/%Y %H:%M:%S",
convertUTF = TRUE, suppress.invalid.date = TRUE) {
# load the file
if(!file.exists(nomeFile)) { obj.LH$sendLog(c( "'",nomeFile,"' does not exist!\n" ),"ERR"); return() }
mydata = read.table(file=nomeFile,sep = sep,header = T,quote=quote)
if(length(mydata)==0) { obj.LH$sendLog(c( "'",nomeFile,"' seems to be empty....\n" ),"ERR"); return() }
if(dim(mydata)[2]==1) { obj.LH$sendLog(c( "'",nomeFile,"' seems to have only one column... check the separator!\n" ),"ERR"); return() }
# Now "load" the data.frame
load.data.frame( mydata = mydata, IDName = IDName, EVENTName = EVENTName,
dateColumnName = dateColumnName , format.column.date = format.column.date,
convertUTF = convertUTF, suppress.invalid.date = suppress.invalid.date)
}
#=================================================================================
# plotTimeline
#=================================================================================
plot.Timeline<-function( patID , table.format.date="%d/%m/%Y",output.format.date = "%d/%m/%Y",cex.axis = 0.6, cex.text = 0.7) {
matrice <- cbind( pat.process[[ as.character(patID) ]][[param.dateColumnName]],
pat.process[[ as.character(patID) ]][[param.EVENTName]])
# vedi stessa cosa in order.list.by.date
newdate <- strptime(as.character(matrice[,1]), input.format.date)
matrice[,1] <- format(newdate, table.format.date)
# plotTimeline(eventTable = matrice, output.format.date = output.format.date, cex.axis = cex.axis,
# cex.text = cex.text )
plotTimeline(eventTable = matrice, table.format.date = table.format.date, output.format.date = output.format.date,
cex.axis = cex.axis,
cex.text = cex.text )
}
#=================================================================================
# plot.time.probability
#=================================================================================
plot.transition.time.probability<-function( from.state , to.state , col.cum='red' ,
col.dens = 'BLUE',
prob.cumulative = TRUE, prob.density = TRUE,
plotIT = TRUE, returnValues=FALSE) {
# browser()
ppp <- MM.den.list.high.det[[ from.state ]][[ to.state ]]
density.pp <- density(ppp,from = 0)
# delta.x <- density(ppp)$x[ 2 ] - density(ppp)$x[ 1 ]
delta.x <- density.pp$x[ 2 ] - density.pp$x[ 1 ]
# min.cum.sum = cumsum(density(ppp)$y)
min.cum.sum = cumsum(density.pp$y)
min.cum.sum <- min.cum.sum * delta.x
# normalizza a '1'
min.cum.sum <- (1/max(min.cum.sum)) * min.cum.sum
if( plotIT == TRUE ) {
main <- paste( c("Time-Transition Probability\n(",from.state," => ",to.state,")") ,collapse='')
# plot(0,0,ylab='Prob.',xlab='Time (mins)',xlim = range(density(ppp)$x),ylim=c(0,1),col='white',main=main)
plot(0,0,ylab='Prob.',xlab='Time (mins)',xlim = range(density.pp$x),ylim=c(0,1),col='white',main=main)
# points(y = min.cum.sum, x = density(ppp)$x,type='l',col = col.cum)
points(y = min.cum.sum, x = density.pp$x,type='l',col = col.cum)
par(new=TRUE)
# plot(y = density(ppp)$y, x = density(ppp)$x,type='l', axes = FALSE, bty = "n", xlab = "", ylab = "", col = col.dens)
plot(y = density.pp$y, x = density.pp$x,type='l', axes = FALSE, bty = "n", xlab = "", ylab = "", col = col.dens)
axis(4)
}
if(returnValues == TRUE) {
return(list(
# "x" = density(ppp)$x,
# "density" = density(ppp)$y,
"x" = density.pp$x,
"density" = density.pp$y,
"cumulative" = min.cum.sum
))
}
}
old.plot.transition.time.probability<-function( from.state , to.state , col.cum='red' ,
col.dens = 'green',
prob.cumulative = TRUE, prob.density = TRUE,
plotIT = TRUE, returnValues=FALSE) {
# MM.den.list.high.det
ppp <- MM.den.list.high.det[[ from.state ]][[ to.state ]]
delta.x <- density(ppp)$x[ 2 ] - density(ppp)$x[ 1 ]
min.cum.sum = cumsum(density(ppp)$y)
min.cum.sum <- min.cum.sum * delta.x
main <- paste( c("Time-Transition Probability\n(",from.state," => ",to.state,")") ,collapse='')
plot(0,0,ylab='Prob.',xlab='Time',xlim = range(density(ppp)$x),ylim=c(0,1),col='white',main=main)
if(prob.cumulative==TRUE) points(y = min.cum.sum, x = density(ppp)$x,type='l',col = col.cum)
if(prob.density==TRUE) points(y = density(ppp)$y, x = density(ppp)$x,type='l',col = col.dens)
# plot(y = min.cum.sum, x = density(ppp)$x,main= main,xlab='time',ylab='Prob',type='l',col = col)
}
#=================================================================================
# loader
#=================================================================================
getData<-function( ) {
# MMatrix.perc
MM<-MMatrix;
for( i in seq( 1 , nrow(MM)) ) { if(sum(MM[i,])>0) {MM[i,]<-MM[i,]/sum(MM[i,]);} }
MMatrix.perc<-MM
# MMatrix.perc.noLoop
MM<-MMatrix;
diag(MM)<-0;
for( i in seq( 1 , nrow(MM)) ) { if(sum(MM[i,])>0) {MM[i,]<-MM[i,]/sum(MM[i,]);} }
MMatrix.perc.noLoop<-MM
return(list(
"arrayAssociativo"=arrayAssociativo,
"footPrint"=footPrint,
"MMatrix"=MMatrix,
"pat.process"=pat.process,
"MMatrix.perc"=MMatrix.perc,
"MMatrix.perc.noLoop"=MMatrix.perc.noLoop,
"wordSequence.raw"=wordSequence.raw,
"MM.mean.time"=MM.mean.time,
"MM.density.list"=MM.density.list,
"MM.den.list.high.det"=MM.den.list.high.det,
"MM.mean.outflow.time"=MM.mean.outflow.time,
"original.CSV"=original.CSV,
"csv.column.names" = param.column.names,
"csv.IDName"=param.IDName,
"csv.EVENTName"=param.EVENTName,
"csv.dateColumnName"=param.dateColumnName,
"csv.date.format"=input.format.date,
"csv.max.pMineR.internal.ID.Evt"=max.pMineR.internal.ID.Evt
))
}
getClass<-function(){
return(list(
"class"="dataLoader",
"obj.ID"=global.personal.ID
))
}
#=================================================================================
# costructor
#=================================================================================
costructor<-function( verboseMode , max.char.length.label, saveMemory ) {
arrayAssociativo<<-''
footPrint<<-''
MMatrix<<-''
pat.process<<-''
wordSequence.raw<<-''
MM.mean.time<<-''
MM.mean.outflow.time<<-''
MM.density.list<<-''
MM.den.list.high.det<<-''
list.dictionary<<-list()
list.dict.column.event.name<<-list()
input.format.date<<-''
max.pMineR.internal.ID.Evt<<-0
# Not true data, but useful anyway
param.IDName<<-''
param.EVENTName<<-''
param.dateColumnName<<-''
param.verbose<<-verbose.mode
param.column.names<<-''
param.max.char.length.label<<-max.char.length.label
param.save.memory<<- saveMemory
original.CSV <<- ''
obj.LH<<-logHandler()
global.personal.ID<<-paste( c(as.character(runif(1,1,100000)),as.character(runif(1,1,100000)),as.character(runif(1,1,100000))), collapse = '' )
# print(timesTwo( 3.2 ))
}
costructor( verboseMode = verbose.mode, max.char.length.label = max.char.length.label, saveMemory = save.memory )
#=================================================================================
return(list(
"load.csv"=load.csv,
"load.csv.GUI"=load.csv.GUI,
"load.data.frame"=load.data.frame,
"getData"=getData,
"applyFilter"=applyFilter,
# "removeEvents"=removeEvents,
# "keepOnlyEvents"=keepOnlyEvents,
"addDictionary"=addDictionary,
"addDataDescription"=addDataDescription,
"getTranslation"=getTranslation,
"plot.Timeline"=plot.Timeline,
"plot.transition.time.probability"=plot.transition.time.probability,
"getClass"=getClass
))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.