R/utils.R

Defines functions prepare_trajectories trnorm cohen round_even colormixer group subsample count_changes points_on_ideal point_to_line create_results extract_dimensions extract_data is_mousetrap_data

#' @importFrom magrittr %>%
magrittr::`%>%`

# Suppress R CMD check note
#' @importFrom lifecycle deprecate_soft

# Check that data is a mousetrap object
is_mousetrap_data <- function(data){

  return(inherits(data,"mousetrap"))

}


# Extract data from mousetrap object
extract_data <- function(data, use) {

  if (is_mousetrap_data(data)){

    extracted <- data[[use]]
    
    if(is.null(extracted)){
      
      # Cover special cases due to the replacement of 
      # mt_spatialize with mt_length_normalize
      if (use == "ln_trajectories" & "sp_trajectories" %in% names(data)){
        warning(
          "Instead of ln_trajectories only sp_trajectories were found. ",
          "These will be used. ",
          "Please note that the mt_spatialize function has been deprecated ",
          "and that the mt_length_normalize function should be used instead."
          )
        return(data[["sp_trajectories"]])
        
      } else if (use == "sp_trajectories" & "ln_trajectories" %in% names(data)){
        warning(
          "Instead of sp_trajectories only ln_trajectories were found. ",
          "These will be used. ",
          "Please note that since the mt_spatialize function has been replaced ",
          "with the mt_length_normalize function, the resulting trajectories ",
          "are now called ln_trajectories instead of sp_trajectories."
        )
        return(data[["ln_trajectories"]])
        
      } else {
        stop("No data called '",use,"' found.")
      }
      
    }
    return(extracted)

  } else if (is.array(data)){

    return(data)
    
  } else {
    
    stop("Data can either be of class mousetrap or array.")
    
  }

}


# Function for extracting dimensions from trajectory array
# that also handles edge case of trajectory array with single trajectory
extract_dimensions <- function(trajectories, dimensions){
  
  trajectories_dim <- dim(trajectories)
  
  # Standard case (no modification required)
  if(trajectories_dim[1] != 1){
    return(trajectories[,,dimensions])
    
  # Edge case of single trajectory
  } else{
    
    # Use drop argument to prevent dropping of dimensions
    trajectories <- trajectories[,,dimensions, drop = FALSE]
    
    # In case only a single dimension is extracted,
    # drop third dimension from trajectory array
    if(length(dimensions) == 1){
      dim(trajectories) <- trajectories_dim[1:2]
    }
    
    return(trajectories)
  }
}


# Function to create results
create_results <- function(data, results, use, save_as, ids=NULL, overwrite=TRUE) {

  # Procedure for trajectories
  if (length(dim(results)) > 2) {
    if (is_mousetrap_data(data)) {
      data[[save_as]] <- results
      return(data)
    } else {
      return(results)
    }


  # Procedure for measures
  } else {
    results <- as.data.frame(results)

    # Extract / set ids
    if (is.null(ids)) {
      if (is.null(rownames(results))) {
        stop("No ids for create_results function provided.")
      } else {
        ids <- as.character(rownames(results))
      }
    } else {
      ids <- as.character(ids)
      rownames(results) <- ids
    }

    # Return results depending on type of data and overwrite setting
    if (is_mousetrap_data(data)){
      if (save_as %in% names(data) & overwrite == FALSE) {
        # check if columns already exist in data[[save_as]]
        if (any(colnames(results)%in%colnames(data[[save_as]]))){
          # if so, remove them and issue warning
          data[[save_as]] <- data[[save_as]][,colnames(data[[save_as]])[!colnames(data[[save_as]])%in%colnames(results)],drop=FALSE]
          warning("Columns of same name already exist and have been replaced")
        }
        # ensure id column is present
        data[[save_as]][,"mt_id"] <- as.character(rownames(data[[save_as]]))
        results[,"mt_id"] <- as.character(rownames(results))
        # merge by rownames
        data[[save_as]] <- dplyr::inner_join(data[[save_as]], results, by="mt_id")
        # set rownames again
        rownames(data[[save_as]]) <- data[[save_as]][,"mt_id"]
        # sort data
        data[[save_as]] <- data[[save_as]][ids,]
      } else {
        data[[save_as]] <- cbind(mt_id=ids, results)
      }
      # ensure rownames are characters
      data[[save_as]][,"mt_id"] <- as.character(data[[save_as]][,"mt_id"])
      return(data)
      
    } else {
      return(cbind(mt_id=ids, results))
    }
  }
}


# Function to determine the point on the line between P1 and P2
# that forms a line with P0 so that it is orthogonal to P1-P2.
# For details regarding the formula, see
# http://paulbourke.net/geometry/pointlineplane/
#
# The function expects P0 to be a matrix of points.
point_to_line <- function(P0, P1, P2){

  u <- ( (P0[,1]-P1[1]) * (P2[1]-P1[1]) +
           (P0[,2]-P1[2]) * (P2[2]-P1[2]) ) /
    ( (P2[1]-P1[1])^2 + (P2[2]-P1[2])^2 )

  P <- matrix(c(
    P1[1] + u * (P2[1] - P1[1]),
    P1[2] + u * (P2[2] - P1[2])),
    ncol = 2, byrow = FALSE
  )

  colnames(P) <- colnames(P0)

  return(P)
}


# Function to determine points on the straight line connecting the start and end
# points that result from an orthogonal projection of the individual points on
# the curve
points_on_ideal <- function(points, start=NULL, end=NULL) {

  # Fill start and end values if otherwise unspecified
  if (is.null(start)) {
    start <- points[1,]
  }
  if (is.null(end)) {
    end <- points[nrow(points),]
  }

  if (all(start == end)) {
    # If start and end points are identical,
    # no projection can be computed.
    # Therefore, we return the start/end point
    # as a fallback result.
    warning(
      "Start and end point identical in trajectory. ",
      "This might lead to strange results for some measures (e.g., MAD)."
    )
    result <- points
    result[,1] <- start[1]
    result[,2] <- start[2]
  } else {
    # compute the projection onto the idealized straight line for all points
    result <- point_to_line(P0 = points, P1 = start, P2 = end)
  }

  return(result)
}


# Function to calculate the number of flips
count_changes <- function(pos, threshold=0, zero_threshold=0) {

  # Calculate differences in positions between subsequent steps
  # (pos is a one-dimensional vector)
  changes <- diff(pos, lag=1)

  # Only keep logs with changes (above zero_threshold)
  changes <- changes[abs(changes) > zero_threshold]

  # Initialize variables
  cum_changes <- c() # vector of accumulated deltas
  cum_delta <- changes[1] # current accumulated delta

  # Iterate over the changes, and summarize
  # those with the same sign by generating the sum.
  # When the sign changes, a new value is added to
  # the cum_changes vector and the cumulative sum
  # of consecutive changes is reset.
  for (delta in changes[-1]) {

    # check if previous (accumulated) and current delta have the same sign
    if (sign(cum_delta) == sign(delta)) {

      # if so, accumulate deltas
      cum_delta <- delta + cum_delta
    } else {
      # if not, save accumulated delta
      cum_changes <- c(cum_changes, cum_delta)

      # reset cum_delta to current delta
      cum_delta <- delta
    }
  }

  # save last cum_delta
  cum_changes <- c(cum_changes, cum_delta)

  # Count changes in direction/sign

  # If there is no threshold, simply look at the number of the accumulated deltas
  if (threshold == 0) {
    n <- length(cum_changes) - 1

  } else {
    # If a threshold is set,
    # only keep changes above threshold
    cum_changes <- cum_changes[abs(cum_changes) > abs(threshold)]

    # Count changes in sign
    # (the diff converts changes in sign into +-1s or 0s for no changes,
    # and their absolute values are added up along the vector)
    n <- sum(abs(diff(cum_changes > 0)))
  }

  return(n)
}





#subsample
subsample = function(data, use = c('trajectories'), n, seed = 1){
  if(is.array(data) | is.data.frame(data) | is.matrix(data)){
    total_cases = dim(data)[1]
    if(n <= total_cases){
      set.seed(seed)
      select = sample(1:total_cases,n)
      set.seed(NULL)
      if(length(dim(data)) == 2){
        data = data[select,]
      }
      if(length(dim(data)) == 3){
        data = data[select,,]
      }
      if(length(dim(data)) == 1 | length(dim(data)) > 3){
        stop('Unexpected number of dimensions')
      }
    }
  }
  if(is.list(data)){
    if(mean(use %in% names(data)) != 1) stop('Objects specified in use do not exist')
    total_cases = dim(data[[use[1]]])[1]
    if(n <= total_cases){
      set.seed(seed)
      select = sample(1:total_cases,n)
      set.seed(NULL)
      for(i in use){
        if(is.array(data[[i]]) | is.data.frame(data[[i]]) | is.matrix(data[[i]])){
          if(length(dim(data[[i]])) == 2){
            data[[i]] = data[[i]][select,]
          }
          if(length(dim(data[[i]])) == 3){
            data[[i]] = data[[i]][select,,]
          }
        } else {
          stop('Objects in use cannot be subsetted')
        }
      }
    }
  }
  return(data)
}




# group

group = function(x,n,type = 'extreme'){
  minx = min(x)
  maxx = max(x)
  x = (x - minx) / (.0000001 + (maxx - minx))
  if(type == 'extreme') x = round((x * n) - .5) / (n - 1)
  if(type == 'mid') x = round((x * n) - .5) / n + (1/(2*n))
  x = x * (maxx - minx)
  x = x + minx
  return(x)
}


# colormixer

colormixer = function(col1,col2,weight,format = 'rgb'){
  
  if(ifelse(is.matrix(col1),nrow(col1),length(col1)) != length(weight) &
     length(col1) != 1){
    stop('Length of col1 must be either 1 or matching the length of weight')
  }
  if(ifelse(is.matrix(col2),nrow(col2),length(col2)) != length(weight) &
     length(col2) != 1){
    stop('Length of col1 must be either 1 or matching the length of weight')
  }
  if(length(weight) == 1){
    if(ifelse(is.matrix(col1),nrow(col1),length(col1)) !=
       ifelse(is.matrix(col2),nrow(col2),length(col2))){
      stop('If length of weight = 1, number of colors in col1 and col2 must match')
    }
  }
  
  nrows = max(c(ifelse(is.matrix(col1),nrow(col1),length(col1)),
                ifelse(is.matrix(col2),nrow(col2),length(col2)),
                length(weight)))
  
  if(is.character(col1)){
    if(length(col1) == 1){
      col1 = grDevices::col2rgb(col1)
      col1 = matrix(c(col1),ncol=3,nrow=nrows,byrow=T)
    } else {
      col1 = t(sapply(col1,grDevices::col2rgb))
    }
  } else{
    col1 = matrix(c(col1),ncol=3,nrow=nrows,byrow=F)
  }
  if(is.character(col2)){
    if(length(col2) == 1){
      col2 = grDevices::col2rgb(col2)
      col2 = matrix(c(col2),ncol=3,nrow=nrows,byrow=T)
    } else {
      col2 = t(sapply(col2,grDevices::col2rgb))
    }
  } else{
    col2 = matrix(c(col2),ncol=3,nrow=nrows,byrow=F)
  }
  
  
  col = col1 * (1-weight) + col2 * weight
  
  if(format == 'rgb') return(col)
  if(format == 'hex') return(grDevices::rgb(data.frame(col),maxColorValue = 255))
  if(!format %in% c('rgb','hex')) stop('Choose either "rgb" or "hex" as format')
  
}


# round even
round_even = function(x){
  rx = round(x)
  test = rx %% 2
  ifelse(test == 0,rx,
         ifelse( x < 0,
                 ifelse(x <  rx,floor(x),ceiling(x)),
                 ifelse(x >= rx,ceiling(x),floor(x))))
}


# Cohens d
# Computed cohens d
cohen = function(x,y){
  
  nx = length(x)
  ny = length(y)
  vx = stats::var(x)
  vy = stats::var(y)
  
  nom = mean(x) - mean(y)
  den = sqrt(((nx-1)*vx + (ny-1)*vy)/(nx+ny-2))
  coh = nom / den
  
  v_d = sqrt((nx+ny)/(nx*ny) + (coh*coh) / (2 * (nx + ny)))
  
  rx = vx/nx
  ry = vy/ny
  nu = (rx + ry)**2 / ((rx ** 2 / (rx - 1) + (ry ** 2 / (ry - 1))))
             
  return(c(coh,v_d,nu))
  }

# draws from truncated normal
trnorm <- function(n,m,sd,a,b){
  v = stats::rnorm(n,m,sd)
  repeat{
    sel =  v < a | v > b
    if(all(!sel)) break
    v[sel] = stats::rnorm(sum(sel),m,sd)
  }
  return(v)
}


# NA removal and transformation for all clustering related functions
# E.g., mt_cluster, mt_cluster_k, etc.

prepare_trajectories = function(trajectories, dimensions, weights, na_rm){
  
  # limit trajectories to dimensions
  trajectories <- trajectories[,,dimensions]
  
  # Ensure that there are no NAs
  include = rep(TRUE,ncol(trajectories))
  if(na_rm == TRUE){
    for(dim in dimensions) include = include & colSums(is.na(trajectories[,,dim])) == 0
    if(sum(include) == 0) stop('No complete case in use')
    if(mean(include) != 1) warning(paste('Removed',sum(!include),'trajectory points due to NAs'))
  } else {
    if(any(is.na(trajectories[,,dimensions]))) {
      stop("Missing values in trajectories not allowed for mt_distmat ",
           "as all trajectories must have the same number of observations.")
    }
  }
  
  # Remove NAs
  trajectories <- trajectories[,include,]
  
  # weight variables
  if(!is.null(weights)){
    if(length(weights) == length(dimensions)){
      for(i in 1:length(dimensions)){
        trajectories[,,dimensions[i]] = trans_mat(trajectories[,,dimensions[i]],scale = weights[i])
      }
    } else {
      stop('weights must match length of dimensions')
    }
  }
  
  # return trajectories
  return(trajectories)
}

Try the mousetrap package in your browser

Any scripts or data that you put into this service are public.

mousetrap documentation built on Oct. 23, 2023, 5:08 p.m.