R/fortify.R

Defines functions fortify.anomalies fortify.stranger fortify.singular

Documented in fortify.anomalies fortify.singular fortify.stranger

### fortify
#' Merge stranger/singular objects to a dataset
#'
#' \code{fortify} method for \code{anomalies}, \code{stranger} and \code{singular} objects to enrich data. Usually,
#' this is invoked on source data. For obvious precautions, this is done on an id column
#' as managed by the typical workflow (\code{crazyfy}). In case you enrich data without any
#' specific id, it is just assumed to have those data in the same order than data used in
#' anomaly detection computations -- this behavior may really needs to unwanted results.
#'
#' @param x anomalies objects as generated by a call to \code{\link{get_anomalies}}.
#' @param data data to enrich; if NULL (default), we use the data used during computation
#' @param id character - name of the id column. No need to specify this if you follow the recommended
#' process using either first \code{\link{add_id}} or \code{\link{crazyfy}}.
#' @param colname character - name of the column to be created for the flag (default: flag_anomaly).
#' @param \dots fortify generic parameter -- not used for stranger objects.
#'
#' @rdname fortify_stranger
fortify.anomalies  <- function(x,data=NULL,id=NULL,colname="flag_anomaly",...){
  # if is.null(data) take from attributes/metadata
  # no check here at all -> have to implement (dimension, same rownames...)
  # we just use row.names

  assertthat::assert_that(inherits(x,'anomalies'),msg="x must be an anomaly object.")
  meta <- attr(x,"meta")

  if (is.null(data)){
    if (is.null(meta$data)) stop("data is NULL but can't be derived from x metadata. Is x an anomaly object?")
    data <- meta$data
  }

  if (is.null(id)){
    if (meta$crazymeta$had.id){
      idcol <- meta$crazymeta$id
    } else {
      idcol <- '.id'
    }
    if (!idcol %in% colnames(data)){
      warning(paste("Used source ID",idcol,"not present in provided data. We act as if 1:n was ID (ie. add_id/crazyfy used). This may lead to unexpected results.\n"))
      data[[".id"]] <- 1:nrow(data)
      idcol=".id"
    }
  } else{
    idcol=id
    if (!id %in% colnames(data)) stop(paste("ID",id , "not present in source data.frame"))
  }
  data[[colname]] <- data[[idcol]] %in% x
  attr(data[[colname]],"sort") <- -1 # for plots
 # print(names(meta))
  meta[["anomalyflag"]] <- colname
  attr(data,"meta") <- meta
  class(data) <- c("fortifiedanomaly",class(data))
  return(data)
}


#' @param all.x merge parameter
#' @param all.y merge parameter
#' @rdname fortify_stranger
fortify.stranger <- function(x,data=NULL,id=NULL,all.x=TRUE, all.y=FALSE,...){
  if (is.null(data)) {
    data <- attr(x,"meta")$data
    # reuse mapping table with eventual duplicates to enrich outliers with their eventual  duplicate ID
    crazymeta <- attr(data,"meta")
    if (crazymeta$has.duplicates){
      #<!> To be tested
      # dup.id.mapping
      dup <- crazymeta$duplicated.mapping
      setkey(dup,.id)
      data=dup[data, on=.(.id)]
      data[!is.na(data[,mappedid]),][[".id"]] <- data[!is.na(data[,mappedid]),mappedid]
      data[,mappedid:=NULL]
    }
  }
  meta <- attr(x,"meta")

  if (is.null(id)){
    if (meta$crazymeta$had.id){
      idcol <- meta$crazymeta$id
    } else {
      idcol <- '.id'
    }
    if (!idcol %in% colnames(data)){
      warning(paste("Used source ID",idcol,"not present in provided data. We act as if 1:n was ID (ie. add_id/crazyfy used). This may lead to unexpected results.\n"))
      data[[".id"]] <- 1:nrow(data)
      idcol=".id"
    }
  } else{
    idcol=id
    if (!id %in% colnames(data)) stop(paste("ID",id , "not present in source data.frame"))
  }
  if (nrow(x)!= nrow(data)) warning("data do not have same number of rows that x - there may be unexpected results.")

  vars <- colnames(data)
  vars <- vars[!vars%in% idcol]
  out <- merge(data,x, by=idcol,all.x=all.x,all.y=all.y)
  meta$variables <- vars
  attr(out,"meta") <- meta
  cl <- class(x)
  if (!"data.table"%in% class(data)) cl <- cl[cl!="data.table"]
  class(out) <- c("fortifiedstranger",cl)
  return(out)
}

#' @rdname fortify_stranger
fortify.singular <- function(x,data=NULL,id=NULL,all.x=TRUE, all.y=FALSE,...){
  if (is.null(data)) {
    data <- attr(x,"meta")$data
    crazymeta <- attr(data,"meta")
    if (crazymeta$has.duplicates){
      #<!> To be tested
      # dup.id.mapping
      dup <- crazymeta$duplicated.mapping
      setkey(dup,.id)
      data=dup[data, on=.(.id)]
      data[!is.na(data[,mappedid]),][[".id"]] <- data[!is.na(data[,mappedid]),mappedid]
      data[,mappedid:=NULL]
    }

  }
  meta <- attr(x,"meta")

  if (is.null(id)){
    if (meta$crazymeta$had.id){
      idcol <- meta$crazymeta$id
    } else {
      idcol <- '.id'
    }
    if (!idcol %in% colnames(data)){
      warning(paste("Used source ID",idcol,"not present in provided data. We act as if 1:n was ID (ie. add_id/crazyfy used). This may lead to unexpected results.\n"))
      data[[".id"]] <- 1:nrow(data)
      idcol=".id"
    }
  } else{
    idcol=id
    if (!id %in% colnames(data)) stop(paste("ID",id , "not present in source data.frame"))
  }
  if (nrow(x)!= nrow(data)) warning("data do not have same number of rows that x - there may be unexpected results.")

  vars <- colnames(data)
  vars <- vars[!vars%in% idcol]
  out <- merge(data,x, by=idcol,all.x=all.x,all.y=all.y)
  meta$variables <- vars
  attr(out,"meta") <- meta
  cl <- class(x)
  if (!"data.table"%in% class(data)) cl <- cl[cl!="data.table"]
  class(out) <- c("fortifiedsingular",cl)
  return(out)
}

Try the stranger package in your browser

Any scripts or data that you put into this service are public.

stranger documentation built on March 18, 2018, 2:01 p.m.