R/anonymize.R

Defines functions anonymize.ego anonymize.smtrx anonymize.default anonymize

Documented in anonymize anonymize.ego anonymize.smtrx

#' @title Replace badge identifiers
#'
#' @description Sociometric data contains the numeric Badge identifier. This function
#'  "anonymizes" the exported identifiers (which correspond to the unique Badge IDs) with
#'  alternative IDs. Replacement values can be provided or will be generated by the
#'  function.
#'
#' @param x A data frame with one or several columns to anonymize ids. Usually these
#'  columns are "Badge.ID" and "Other.ID"
#' @param ids vector of values to be replaced. If (default=\code{NULL}) gathers
#'  automatically a list of unique values across all \code{cols}.
#' @param replv vector of replacement values. Default value \code{NULL} will generate replacement
#'  values as sequence from 1:n (number of unique elements)
#' @param cols Vector of column (names or indices) over which replacement will happen.
#' @param decreasing Logical. In case \code{ids == NULL}, indicates how the retrieved ids from the
#'  indicated columns are ordered. If a list of \code{ids} is provided, the ordering will be ignored.  See Details!
#'
#' @return Dataframe with replaced values in indicated columns.
#'
#' @details In order to replace values, two behaviors are possible: a) the mapping from original
#'  values to replacement values is specified by vectors of equal length for \code{ids} and
#'  \code{replv} where the first element of \code{ids} is replaced with the first element of \code{replv}
#'  and so forth. If \code{ids=NULL}, the function retrieves a list of unique IDs over the
#'  specified columns. The list of unique values is generated over the combined list of all
#'  columns and not on a per column basis. It is decisive that this list of unique IDs can be ordered differently which affects
#'  which replacement values are assigned! For the sorting of the unique IDs see \code{\link{unique_ids}}.
#'  The default value \code{decreasing=F} makes sure that unique IDs are sorted in ascending order
#'  which corresponds to the order of the automatically generated replacement values from
#'  1..n in case \code{replv=NULL}
#'
#' @examples
#' x <- data.frame(a=c(1:15), b=c(11:25), c=sample(25:30, size=15, replace=T))
#'
#' #replace all 2 with "AA" and all 13 with -99 over all three columns.
#' anonymize(x, ids=c(2,13,25), replv=c("AA",-99, "--"), cols=c("a", "b", "c"))
#'
#' #replaces sequence of 11:25 with 15:1 in column "b"
#' anonymize(x, cols=c("b"), decreasing=T)
#'
#' #column "c" repeats values between 25:30. Replaces in increasing order 1:5
#' anonymize(x, cols=c("c"), decreasing=F)
#'
#' #replaces in inversed (decreasing) order 5:1
#' anonymize(x, cols=c("c"), decreasing=T)
#'
#' #combine two columns and inverse order
#' anonymize(x, cols=c("b", "c"), decreasing=T)
#'
#'
#' @export anonymize
#' @exportClass anonymize
#'
anonymize <- function(x, ids=NULL, replv=NULL, cols=NULL, decreasing=F){
  UseMethod("anonymize", x)
}



#' @export
anonymize.default <- function(x, ids=NULL, replv=NULL, cols=NULL, decreasing=F){

  cls <- class(x)

  #generate list of ids to be anonymized
  if (is.null(ids)){
    ids <- unique_ids(x=x, cols=cols, decreasing=decreasing)
  }


  if (is.null(ids) | length(ids)==0){
    stop("Nothing to anonymize in columns ", cols)
  }

  #generate replacement values if none provided
  if (is.null(replv)){
    replv <- c(1:length(ids))
  }

  #matching and replacement values should have same length
  if (length(ids) != length(replv)){
    stop("Replacement mismatch. Vectors of different size!")
  }

  if (anyNA(ids) | anyNA(replv)){
    stop("NA values in anonymize not allowed!")
  }

  if (!is.vector(ids) | !is.vector(replv)){
    stop("Match/replace values need to be a vector")
  }

  for (col in cols){
    for (i in seq_along(ids)){
      x[x[col]==ids[i], col] <- replv[i]
    }
  }

  class(x) <- cls

  return(x)
}



#' @describeIn anonymize Anonymize sociometrics data with two columns "Badge.ID" and "Other.ID" by default
#' @export
anonymize.smtrx <- function(x, ids=NULL, replv=NULL,  cols=c("Badge.ID", "Other.ID"), decreasing=F){

  x <- NextMethod("anonymize",x, ids=ids, replv=replv, cols=cols)

  x
}


#' @describeIn anonymize Anonymize sociometrics data frame with single column "Badge.ID" by default.
#' @export
anonymize.ego <- function(x, ids=NULL, replv=NULL, cols=c("Badge.ID"), decreasing=F){

  x <- NextMethod("anonymize", x, ids=ids, replv=replv, cols=cols)

  x
}
jmueller17/sociometrics documentation built on March 20, 2024, 1:04 a.m.