R/readEvents.R

Defines functions readEvents

Documented in readEvents

#' @title Reads the network events file.
#'
#' @description Reads the network events file. This file can come from the network 
#' simulator or it ca be a file with real mobile network events provided by an MNO.
#'
#' @param eventsFileName The name of the file with the network events to be used. 
#' Depending on the parameter \code{simulatedData} it could be a .csv file coming 
#' from the simulation software or from a real MNO. In case the file comes from 
#' the simulation software it should contain following columns: \cr 
#' \code{time, antennaID, eventCode, deviceID, x, y, tile}. 
#' Only the first 4 columns are used, the rest are ignored.
#'
#' @param simulatedData If TRUE it means that the input data are simulated data, 
#' otherwise the data come from a real MNO.
#'
#' @return Returns a data.table object that contains the events generated by 
#' the mobile network. The number of rows equals the number of connection events 
#' recorded by the network. The returned object has the following columns:
#' \code{time, deviceID, eventCode, antennaID, x, y, tile, obsVar}. 
#' \code{obsVar} stands for observed variable and is a concatenation between the 
#' antenna ID and the event code.
#'
#' @import data.table
#' @import stringr
#'
#' @export
readEvents <- function(eventsFileName, simulatedData = TRUE) {
  if (!file.exists(eventsFileName))
    stop(paste0(eventsFileName, " does not exist!"))
  
  if (simulatedData == TRUE) {
    events.dt <-
      fread(
        eventsFileName,
        sep = ',',
        stringsAsFactors = FALSE,
        header = TRUE,
        colClasses = c(
          'integer',
          'character',
          'character',
          'character',
          'numeric',
          'numeric',
          'character'
        )
      )
    events.dt <- events.dt[!duplicated(events.dt)]
    setnames(events.dt ,
             c(
               'time',
               'antennaID',
               'eventCode',
               'deviceID',
               'x',
               'y',
               'tile'
             ))
    events.dt[, obsVar := do.call(paste, c(.SD, sep = "-")), .SDcols = c('antennaID', 'eventCode')]
    
    events.dt <- events.dt[eventCode %in% c('0', '2', '3')]
    
    # To be sure than each time and device there is just one event we choose 0 when (0,3) and 2 when (2,3)
    events.dt_noDup <-
      copy(events.dt)[, list(eventCode = as.character(min(as.numeric(eventCode)))), by = c("time", "deviceID")]
    
    events.dt <-
      merge(events.dt_noDup,
            events.dt,
            by = names(events.dt_noDup),
            all.x = TRUE)
    
    events.dt <-
      events.dt[!duplicated(events.dt, by = c("time", "deviceID", "eventCode"))]
    
    remove(events.dt_noDup)
    
    return (events.dt)
  }
  else {
    cat("Read real mobile network events file not implemented yet!")
    return (NULL)
  }
}
bogdanoancea/deduplication documentation built on Dec. 2, 2020, 11:22 p.m.