R/singularize.R

Defines functions singularize

Documented in singularize

#' Normalize anomalies metrics and (eventually) stack them
#' 
#' \code{singularize} derives normalized/standardized versions of different weirds contained in a stranger object so that they are directly comparable and then propose various aggregated measures (stacking).
#' 
#'@param strangerObject An object build with \code{\link{stranger}} or \code{strangest}
#'@param methods character vector of standardization methods: norm (normalize) and rank (ranking version). By default, both methods are used.
#'@param stacks character vectors of stacked versions. Note that some methods require more than 2 weirds metrics to be used
#'@param prefix default prefix used to name generated new metrics
#'@param \dots Additional parameters - Currently not used.
#'@aliases singular
singularize <- function(strangerObject,
                        methods=c("norm","rank"),
                        stacks=c("max","avg","min","damavg", "pruavg"),
                        prefix="N_anom",...)
{

  assertthat::assert_that(assertthat::is.string(prefix),assertthat::not_empty(prefix),msg="prefix must be an nonempty string.")
  assertthat::assert_that(inherits(strangerObject,"stranger"),msg="First argument must be a stranger Object.")

  methods <- match.arg(methods,c("rank","norm"),several.ok = TRUE)
  staks <- match.arg(stacks,c("max","avg","min","damavg", "pruavg"),several.ok=TRUE)


  meta <- attr(strangerObject,"meta")

  # Will prepare all stack, but only for all specified  methods:  default both  rank and  norm
  # binarization stacking method is chosen with the next method that binarize/flags anomalies (get_strangest)

  # start from an empty data.frame having same ID than our source data
  id <- select(strangerObject,.id)

  out <- id
  strangerObject <- select(strangerObject,-.id)

  ncomp <- ncol(strangerObject)



  buildStack <- function(df,stacks,ids=id,method="norm"){
    # note: method has to be defined in calling environment
    sname <- function(stack){paste(prefix,method,stack,sep="_")}
    ## build stacked derivatives
    if ("max" %in% stacks) ids[[sname("max")]] <- apply(df,1,max)
    if ("min" %in% stacks) ids[[sname("min")]] <- apply(df,1,min)
    if ("avg" %in% stacks) ids[[sname("avg")]] <- apply(df,1,mean)
    if ("damavg" %in% stacks) {
      if (method=="norm"){
        tmp <- apply(df,1,
                     function(x){
                       x[x <= 0] <- -10
                       x[x > 0] <- log(x[x > 0])
                       mean(x)})
        ids[[sname("damavg")]] <- (tmp - min(tmp))/(max(tmp)-min(tmp))
      } else {
        ids[[sname("damavg")]] <- apply(df,1,function(vec) mean(log(vec)))
      }

    }
    if ("pruavg" %in% stacks) {
      if (ncol(df)<2) {
        warning("Not enough columns to derive pruned Averages -- ignoring this derivation.")
      } else {
        ids[[sname("pruavg")]] <- apply(df,1,function(x) ifelse(min(x)==max(x),min(x),mean(x[x!=max(x)])))
      }
    }
    return(select(ids,-.id))
  }


  if ("rank" %in% methods){
    method <- "rank"

    ranks <- as.data.table(lapply(1:ncol(strangerObject),function(i){
      frankv(strangerObject[[i]], order = attr(strangerObject[[i]],"sort"))
    }))
    colnames(ranks) <- paste(prefix, method,names(strangerObject),sep="_")

    out <- cbind(out,ranks)

    # stacked versions
    if (ncomp > 1) {
      out <- cbind(out,buildStack(ranks, stacks,method="rank"))
    } else out <- cbind(out,buildStack(ranks, "avg",method="rank"))

  }


  if ("norm" %in% methods){
    method <- "norm"
    out.norm <- normalize(strangerObject)

    colnames(out.norm) <- paste(prefix,"norm",colnames(strangerObject),sep="_")

    out <- cbind(out,out.norm)


    # stacked versions
    # stacked versions
    if (ncomp > 1) {
      out <- cbind(out,buildStack(out.norm, stacks,method="norm"))
    } else out <- cbind(out,buildStack(out.norm, "avg",method="norm"))



  }
  out <- cbind(strangerObject,out)
  class(out) <- c("singular","stranger",class(out))
  attr(out,"meta") <- meta
  attr(out,"methods") <- methods
  attr(out,"stacks") <- stacks
  return(out)

}

Try the stranger package in your browser

Any scripts or data that you put into this service are public.

stranger documentation built on March 18, 2018, 2:01 p.m.