#'@title Computes the duplicity probability for each device.
#'
#'@description Computes the duplicity probability for each device using three
#'different methods: pairs, 1-to-1 and trajectory. The theory behind these
#'methods is described in detail in \href{https://webgate.ec.europa.eu/fpfis/mwikis/essnetbigdata/images/f/fb/WPI_Deliverable_I3_A_proposed_production_framework_with_mobile_network_data_2020_05_31_draft.pdf}{WPI
#' Deliverable 3} and in the paper \emph{An end-to-end statistical process
#' with mobile network data for Official Statistics}.
#'
#'@param method The method used to compute the duplicity probability. It could
#'have one of the following values: "pairs", "1to1", "trajectory".
#'
#'@param gridFileName The name of the file with the grid parameters. This file
#'could be the one generated by the simulation software or can be created with
#'any text editor. The grid file generated by the simulation software has
#'the following columns: \code{Origin X, Origin Y, X Tile Dim, Y Tile Dim, No Tiles X, No Tiles Y}.
#'We are interested only in the number of rows and columns and the tile size on
#'OX and OY axes. Therefore, the file provided as input to this function should
#'have at least the following 4 columns: \code{No Tiles X} ,\code{No Tiles Y},
#'\code{X Tile Dim} and \code{Y Tile Dim}.
#'
#'@param eventsFileName The name of the file with the network events to be used.
#'Depending on the parameter \code{simulatedData} it could be a .csv file coming
#'from the simulation software or from a real MNO. In case the file comes from
#'the simulation software it contains following columns: \code{time, antennaID, eventCode, deviceID,
#' x, y, tile} Only the first 4 columns are used, the rest are ignored.
#'
#'@param signalFileName The name of the .csv file that contains the signal
#'strength/quality for each tile in the grid. Depending on the parameter
#'\code{simulatedData} it could be a .csv file coming from the simulation
#'software or from a real MNO. In case the file comes from the simulation
#'software the data are organized as a matrix with the number of rows equals to
#'the number of antennas and the the following columns: \code{Antenna ID, Tile 0, Tile 1, ... Tile
#'(N-1)}. On the first column there are the antenna IDs and on the rest of the
#'columns the corresponding signal strength/quality for each tile in the grid.
#'
#'@param cellsFileName The name of the file where the coverage areas of antennas
#'are to be found. It is a should be a .csv file with two values on each row:
#'the antenna ID and a WKT string representing a polygon (i.e. it should start
#'with the word POLYGON) which is the coverage area of the corresponding antenna.
#'This area is also called the antenna cell.
#'
#'@param simulatedData If TRUE the input data are provided by the simulation
#'software, otherwise real data is used.
#'
#'@param simulationFileName The name of the file used to define a simulation
#'scenario. It is the file that was provided as input for the simulation software.
#'This file is required only if simulatedData is TRUE.
#'
#'@param netParams This parameter is required if simulatedData is FALSE, i.e.
#'the \pkg{deduplication} package uses real mobile network data. In this case,
#'\code{netParam} is a list with two elements: \code{conn_threshold} and
#'\code{prob_sec_mobile_phone}. \code{conn_threshold} is the minimum value of
#'the signal strength/quality that can be used to connect a mobile device to an
#'antenna. If the signal in a region is below this value the region is
#'considered out of the coverage area. \code{prob_sec_mobile_phone} is the
#'probability of a person to have two mobile devices.
#'
#'@param path The path where the files with the posterior location probabilities
#'for each device are to be found. This parameter is needed only if the
#'"trajectory" method is used.
#'
#'@param gamma This value is used only for the "trajectory" method and is the
#'factor used in the compariosn between the mode of Delta X and Delta Y and the
#'dispersion radius. The default value is 0.5.
#'
#'@param aprioriProbModel This parameter is used to initialize the apriori
#'probabilities for the HMM model for each device. The default value is NULL
#'which means that by default the models are initialized with the steady state.
#'
#'@param aprioriJointModel This parameter is used to initialize the apriori
#'probabilities for the joint HMM models for each pair of two devices. The
#'default value is NULL which means that by default the models are initialized
#'with the steady state.
#'
#'@param lambda This parameter is used in combination with the "1-to-1" method.
#'If it is NULL (which is the default value) the computations follow the
#'description given in \href{https://webgate.ec.europa.eu/fpfis/mwikis/essnetbigdata/images/f/fb/WPI_Deliverable_I3_A_proposed_production_framework_with_mobile_network_data_2020_05_31_draft.pdf}{WPI
#' Deliverable 3}, otherwise the computations proceed as they are described in the
#' \emph{An end-to-end statistical process with mobile network data for Official Statistics} paper.
#'
#'@param handoverType The handover mechanism used by the mobile network. It could
#'have two values: "strength" or "quality". The default value is "strength".
#'This parameter is used to compute the emission probabilities for the HMM
#'models using the values from the signal file. If this file contains the signal
#'strength then the \code{handoverType} should be set to "strength" and if the
#'file contains the values of the signal quality then the parameter should be set to
#'"quality".
#'
#'@param emissionModel This parameter dictates how the emission probabilities
#'for the HMM models are computed. It can have two values: "RSS" and "SDM".
#'Normally, the emission probabilities are computed using the signal values from
#'the signal file assuming that if the \code{handoverType} is set to "strength"
#'then the signal file contains the strength values and if the \code{handoverType}
#'is set to "quality" the signal file contains the quality of the signal. For
#'demonstrative purposes the package supports unusual combinations. If \code{handoverType}
#' is set to "strength", the signal file contains the signal strength but the
#' \code{emissionModel} is set to "SDM" the signal strength values are transformed
#' to signal quality and then the computation of the emission probabilities uses
#' these transformed values. If \code{handoverType} is set to "quality", the signal
#' file contains the signal quality but the \code{emissionModel} is set to "RSS",
#' the signal quality values are transformed to signal strength and then the
#' computation of the emission probablities uses these transformed values.
#'
#'@param antennaFileName The name of the xml file containing the technical
#'parameters of the antennas needed to transform the signal quality into signal
#'strength and the other way around. This is an input file of the simulation
#'software. An example file is included in this package. The default value is
#'NULL because this file is only needed to make unusual combinations between
#'\code{handoverType} and the way the emission probabilities are computed.
#'
#'@param prefix The file name prefix for the files with posterior location
#'probabilities.
#'
#'@return a data.table object with two columns: \code{deviceID} and \code{dupP}.
#'On the first column there are deviceIDs and on the second column the
#'corresponding duplicity probability, i.e. the probability that a device is in
#'a 2-to-1 correspondence with its holder.
#'
#'
#'@export
computeDuplicity <- function(method, gridFileName, eventsFileName, signalFileName, antennaCellsFileName = NULL, simulatedData = TRUE, simulationFileName, netParams = NULL, path = NULL, gamma = 0.5, aprioriProbModel = NULL, aprioriProbJointModel = NULL, lambda = NULL, handoverType = 'strength', emissionModel = NULL, antennaFileName = NULL, prefix = NULL) {
out_duplicity <- NULL
tryCatch ({
if(method != 'trajectory' & method != "1to1" & method != 'pairs')
stop(paste(method, " method unknown!"))
if(!file.exists(gridFileName))
stop(paste0(gridFileName, " doesn't exist"))
if(!file.exists(eventsFileName))
stop(paste0(eventsFileName, " doesn't exist"))
if(!file.exists(signalFileName))
stop(paste0(signalFileName, " doesn't exist"))
if(method == "pairs")
if(!file.exists(antennaCellsFileName))
stop(paste0(antennaCellsFileName, " doesn't exist. If you use \"pairs\" method you should provide a file with antenna cells."))
if(simulatedData == TRUE)
if (is.null(simulationFileName) | !file.exists(simulationFileName))
stop(paste0(simulationFileName, " dooesn't exist. If you use simulated data you should provide the simulation input file."))
if(simulatedData == FALSE & is.null(netParams))
stop("In case of using real data you should provide a list with two params: the minimum value of the signal detectable by mobile devices and
the probability of a person to have two mobile devices")
gridParams <-readGridParams(gridFileName)
events <- readEvents(eventsFileName)
if(simulatedData == TRUE)
simParams <-readSimulationParams(simulationFileName)
else
simParams <- netParams
devices <- getDeviceIDs(events)
connections <- getConnections(events)
emissionProbs <- getEmissionProbs(gridParams$nrow, gridParams$ncol, signalFileName, simParams$conn_threshold, handoverType, simulatedData = TRUE, emissionModel, antennaFileName)
jointEmissionProbs <- getEmissionProbsJointModel(emissionProbs)
if(is.null(aprioriProbModel)) {
model <- getGenericModel(gridParams$nrow, gridParams$ncol, emissionProbs)
}
else {
model <- getGenericModel(gridParams$nrow, gridParams$ncol, emissionProbs, initSteady = FALSE, aprioriProb = aprioriProbModel)
}
if(is.null(aprioriProbJointModel)) {
modelJ <- getJointModel(gridParams$nrow, gridParams$ncol, jointEmissionProbs)
} else {
modelJ <- getJointModel(gridParams$nrow, gridParams$ncol, jointEmissionProbs, initSteady = FALSE, aprioriJointProb = aprioriProbJointModel)
}
if(method == "pairs" | method == "trajectory") {
coverarea <- readCells(antennaCellsFileName)
antennaNeigh <- antennaNeighbours(coverarea)
P1a <- aprioriDuplicityProb(simParams$prob_sec_mobile_phone, length(devices))
pairs4dup<-computePairs(connections, length(devices), oneToOne = FALSE, antennaNeighbors = antennaNeigh)
if (method == "pairs") {
ll <- fitModels(length(devices), model,connections)
out_duplicity <- computeDuplicityBayesian(method, devices, pairs4dup, modelJ, ll, P1 = P1a)
}
else {
T<-sort(unique(events[,1][[1]]))
out_duplicity <-computeDuplicityTrajectory(path, prefix, devices, gridParams, pairs4dup, P1 = P1a , T, gamma = gamma)
}
}
else if(method == "1to1"){
pairs4dup<-computePairs(connections, length(devices), oneToOne = TRUE)
ll <- fitModels(length(devices), model,connections)
if(!is.null(lambda)) {
out_duplicity <- computeDuplicityBayesian(method, devices, pairs4dup, modelJ, ll, P1 = NULL, Pii = NULL, init = TRUE, lambda = lambda)
}
else {
Piia <- aprioriOneDeviceProb(simParams$prob_sec_mobile_phone, length(devices))
out_duplicity <- computeDuplicityBayesian(method, devices, pairs4dup, modelJ, ll, P1 = NULL, Pii = Piia)
}
}
}, error = function(err){
print(err)
rm(list=setdiff(ls(), "out_duplicity"))
},
finally = {
rm(list=setdiff(ls(), "out_duplicity"))
})
return(out_duplicity)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.