R/utils.R

Defines functions find_elbow local_densities rotx roty rotz idx2rowcol locs2ps points_from_roi points2img circle_hough_transform downsample locprec2cov ary2ps ps2ary crop_point_set

Documented in ary2ps circle_hough_transform crop_point_set downsample find_elbow idx2rowcol local_densities locprec2cov locs2ps points2img points_from_roi ps2ary rotx roty rotz

#' crop_point_set
#'
#' Retain points in the set that are within the given distance from the geometric median of the set.
#' Using the geometric median is more robust than using the centre of mass (i.e. mean).
#'
#' @param point.set a point set as a matrix with columns x,y,z.
#' @param size vector of distances from the geometric median of the points along each axis. 
#'             Points are discarded if they are outside the ellipsoid defined by size and centred on 
#'             the geometric median of the points.
#' @return point set as a matrix with columns x,y,z.
#' @export

crop_point_set <- function(point.set, size) {
    xyz.idx <- match(colnames(point.set), c("x", "y", "z"))
    xyz.idx <- xyz.idx[!is.na(xyz.idx)]
    center <- pracma::geo_median(point.set[, xyz.idx])$p
    center <- center[names(center) %in% c('x','y','z')]
    if(length(size) == 1) { size <- rep(size, length(xyz.idx))}
    if(length(xyz.idx)>length(size)) {
        stop("ERROR: point set and crop size dimensionalities don't match.")
    }
    # Center the points
    point.set[,xyz.idx] <- t(apply(point.set[, xyz.idx, drop = FALSE], 1, function(x) {x-center}))
    # Remove points not within the ellipsoid defined by size
    idx.to.remove <- which(apply(point.set[, xyz.idx, drop = FALSE], 1, function(x) {sum(x^2/size^2)>1}))
    if(length(idx.to.remove>0)) {
        point.set <- point.set[-idx.to.remove,, drop = FALSE]
    }
    return(point.set)
}

#' ps2ary
#'
#' Convert a list of 3d point sets to a 4d array.
#' Also works for 2d point sets to 3d array conversion.
#'
#' @param point.sets a list of point sets.
#' @param dims vector of dimensions of the axes (x,y in 2d, x,y,z in 3d).
#' @return a 3d or 4d array.
#' @export

ps2ary <- function(point.sets, dims) {
    if(!(length(dims) %in% c(2,3))) {
        stop("ERROR: Dimensions can only be 2d or 3d.")
    }
    if(typeof(point.sets) != "list") {stop("ERROR: Argument point.sets must be a list.")}
    n <- length(point.sets)
    ts <- array(0, dim = c(dims, n))
    # Translate so that we start at coordinate 1 along each axis
    for(i in 1:n) {
        mins <- apply(point.sets[[i]],2, min)
        point.sets[[i]] <- t(apply(point.sets[[i]], 1, function(x) {x - mins}))
    }
    for(i in 1:n) {
        for(j in 1:nrow(point.sets[[i]])) {
            a <- round(point.sets[[i]][j,]) # round in case we have interpolated coordinates (e.g. from registration)
            if(length(dims) == 3) {
                ts[a[1], a[2], a[3], i] <- ts[a[1], a[2], a[3], i] + 1
            } else {
                ts[a[1], a[2], i] <- ts[a[1], a[2], i] + 1
            }
        }
    }
    return(ts)
}

#' ary2ps
#'
#' Convert a 4d array to a list of 3d point sets. 
#' The points are formed by extracting the coordinates of array values strictly above the given cut-off (default 0).
#'
#' @param ary a 4d array with last dimension indexing instances.
#' @param bkg Extract points for array values strictly above this (default = 0)
#' @return a list of point sets.
#' @export

ary2ps <- function(ary, bkg = 0) {
    PS <- list()
    n <- dim(ary)[4]
    ## Form point sets
    for (j in 1:n) {
        PS[[j]] <- as.matrix(which((ary[,,,j]>bkg), arr.ind = TRUE))
    }
    # Rename columns as x,y,z
    PS <- lapply(seq(PS), function(i) { X <- as.matrix(PS[[i]]); colnames(X) <- c("x","y","z"); return(X)})
    return(PS)
}

#' locprec2cov
#'
#' Converts localization precision columns to a list of arrays of covariance matrices
#'
#' @param point.sets a list of n point sets with locprec columns (locprecz column required for 3D data)
#' @param scale logical, whether to scale the localization precision by the variance of the coordinates
#' @return a list of 2x2xn or 3x3xn arrays.
#' @export

locprec2cov <- function(point.sets, scale = FALSE) {
  C <- list()
  d <- 3 # Assume 3D data
  # 3D data require locprecz column
  # If it's not in the first point set, consider we have 2D data
  if(!("locprecz" %in% colnames(point.sets[[1]]))) { d <- 2 }
  for(i in 1:length(point.sets)) {
    ns <- nrow(point.sets[[i]])
    C[[i]] <- array(0, dim = c(d,d,ns))
    if(scale) {
      v <- apply(point.sets[[i]], 2, stats::var)
    }
    for(j in 1:ns) {
      if (d==3) {
        # locprec applies to x and y
        C[[i]][,,j] <- diag(point.sets[[i]][j, c("locprec", "locprec", "locprecz")], 3)
        if(scale) {
          C[[i]][,,j] <- C[[i]][,,j] / v[c("x", "y", "z")]
        }
      } else {
        C[[i]][,,j] <- diag(point.sets[[i]][j, c("locprec", "locprec")], 2)
        if(scale) {
          C[[i]][,,j] <- C[[i]][,,j] / v[c("x", "y")]
        }
      }
    }
  }
  return(C)
}

#' downsample
#'
#' Weighted downsampling of a point set.
#' If point weights are not provided, they are computed to be proportional to the local density
#' around each point.
#'
#' @param point.set a point set
#' @param n integer, sample size. 
#' @param k integer, number of nearest neighbours to consider to estimate local density
#' @param weights a vector of probability weights
#' @return a point set
#' @export

downsample <- function(point.set, n = NULL, k = NULL, weights = NULL) {
  if(is.null(k) && is.null(weights)) {
    stop("One of k or weights should be provided")
  }
  if(is.null(n)) {
    stop("Sample size must be provided")
  }
  if(is.null(weights)) {
    weights <- local_densities(point.set, k)
    weights <- (weights - min(weights))/(max(weights) - min(weights))  # Rescale to [0,1]
  }
  replace <- FALSE
  if(nrow(point.set) <= n) {
    message("Sample size is greater than the number of points, drawing with replacement to reach sample size.")
    replace <- TRUE
  }
  sample.idx <- sample(nrow(point.set), n, prob = weights, replace = replace)
  return(point.set[sample.idx,])
}

#' Circle Hough transform
#'
#' Extract coordinates of the centres of circles from a 2D image using the Hough transform
#'
#' @importFrom foreach %dopar%
#' @param pixels input data, either a matrix representing a 2D image or a data frame of signal coordinates with columns x, y.
#'               For images, background is expected to be 0 and signal to have positive values.
#' @param rmin minimum search radius.
#' @param rmax maximum search radius.
#' @param resolution number of steps in the circle transform (default: 360). This represents the maximum number of votes a point can get.
#' @param threshold score threshold between 0 and 1.
#' @param ncpu number of threads to use to speed up computation (default: 1)
#' @return a data frame with columns x, y, r and score
#' @examples
#' point.set <- data.frame(x = c(-9.8,-5.2,12.5,2.5,4.5,1.3,-0.2,0.4,9.3,-1.4,0.5,-1.1,-7.7),
#'                         y = c(-4.2,1.5,-0.5,12,-3,-7.2,10.9,6.7,-1.3,10,6.7,-6.2,2.9))
#' circles <- circle_hough_transform(pixels = point.set, rmin = 3, rmax = 6, resolution = 100,
#'                                   threshold = 0.1, ncpu = 1)
#' @export

circle_hough_transform <- function(pixels, rmin, rmax, threshold, resolution = 360, ncpu = 1) {
  
  if (threshold<=0 || threshold>1) {
    stop("ERROR: Threshold must be between 0 and 1")
  }
  
  circles.found <- NULL
  d <- vector("numeric", 2) # dimensions of the image or region to consider
  
  if(all(class(pixels) != "data.frame" & length(dim(pixels))==2)) { 
    ## Extract table of coordinates of non-zero pixels
    coords <- which(pixels!=0, arr.ind = TRUE)
    if(length(coords)==0) {
      stop("No pixel with intensity above 0 was found in the image.")
    }
    pixels <- as.data.frame(cbind(coords, pixels[coords]))
    colnames(pixels) <- c("x","y","value")
  } 
  if (all(c("x","y") %in% colnames(pixels))) {
    range.x <- range(pixels$x)
    range.y <- range(pixels$y)
    d[1] <- ceiling(range.x[2]-range.x[1])
    d[2] <- ceiling(range.y[2]-range.y[1])
  } else {
    stop("ERROR: unrecognized input data")
  }
  
  radii <- seq(from = rmin, to = rmax, by = 1)
  
  cluster <- parallel::makeCluster(ncpu)
  doParallel::registerDoParallel(cluster)
  on.exit(parallel::stopCluster(cluster))
  
  # Hough transform
  A <- ff::ff(0, dim = c(d[1], d[2], length(radii))) # array(0, c(d[1], d[2], length(radii)))
  tmp <- foreach::foreach(k = 1:length(radii), .combine = 'c', .packages = c('ff')) %dopar% {
    # for(k in 1:length(radii)) {
    r <- radii[k]
    for(i in 1:nrow(pixels)) {
      x <- pixels$x[i]
      y <- pixels$y[i]
      for(theta in seq(from = 0, to = 2*pi, length = resolution)) {
        a <- round(x + r * cos(theta))
        b <- round(y + r * sin(theta))
        if(a > 0 && a <= d[1] && b > 0 && b <= d[2]) {
          A[a, b, k] <- A[a, b, k] + 1
        }
      }
    }
    NULL
  }
  
  ## Find local maxima in accumulator
  ## Iteratively search for the global maximum and
  ## remove the corresponding circle
  ## Repeat until we hit the threshold
  done <- FALSE
  while(!done) {
    value.max <- max(A[])
    score <- value.max/resolution
    if (score <= threshold) {
      done <- TRUE
    } else {
      coords <- which(A[] == value.max, arr.ind = TRUE)
      ## Note: There can be more than one point with max value
      ## Remove the corresponding circles from the accumulator
      for (k in 1:nrow(coords)) {
        r.idx <- coords[k, 3]
        r <- radii[r.idx]
        coords[k, 3] <- r
        x <- coords[k, 1]
        y <- coords[k, 2]
        x0 <- max(c(1, x-r))
        y0 <- max(c(1, y-r))
        x1 <- min(c(d[1], x+r))
        y1 <- min(c(d[2], y+r))
        A[x0:x1, y0:y1,] <- 0
      }
      if(nrow(coords)>1) {
        ## Check if the centres are close to each other
        ## and merge those that are too close
        dst <- as.matrix(stats::dist(coords[,-3]))
        diag(dst) <- NA
        rws <- unique(as.vector(which(dst<min(radii)/4, arr.ind = TRUE)))
        if(length(rws)>1) {
          new.centre <- round(apply(coords[rws,], 2, mean))
          coords <- coords[-rws,]
          coords <- rbind(coords, new.centre)
        }
      }
      circles <- cbind(coords, score)
      circles.found <- rbind(circles.found, circles)
    }
  }
  if (length(circles.found)>0) {
    colnames(circles.found) <- c("x", "y", "r", "score")
  }
  return(as.data.frame(circles.found))
}

#' points2img
#'
#' Convert a data frame of point coordinates into an image.
#' Expected photon count at each voxel is computed as in:
#' F. Huang, S. L. Schwartz, J. M. Byars, and K. A. Lidke, “Simultaneous multiple-emitter fitting for single
#' molecule super-resolution imaging,” Biomed. Opt. Express 2(5), 1377–1393 (2011).
#'
#' @importFrom foreach %dopar%
#' @param points a point set as a data frame of coordinates with columns x,y,z.
#' @param channels vector of channels to consider, must be values present in the input data frame channel column
#' @param voxel.size a numeric vector of length 3 indicating the size of the voxel along x,y and z in the same unit as the coordinates (e.g. nm)
#' @param method how to calculate voxel values. Available methods are:
#'   \itemize{
#'     \item 'histogram': value is the number of points (i.e. emitters) in the voxel
#'     \item 'photon': value is the expected number of photons from the points in the voxel. Input data frame must have columns locprec, locprecz and phot[on].
#'}
#' @param ncpu number of threads to use to speed up computation (default: 1)
#' @return an array of dimensions x,y,z and channels if applicable
#' @examples 
#' point.set <- data.frame(x = c(-9.8,-5.2,12.5,2.5,4.5,1.3,-0.2,0.4,9.3,-1.4,0.5,-1.1,-7.7),
#'                         y = c(-4.2,1.5,-0.5,12,-3,-7.2,10.9,6.7,-1.3,10,6.7,-6.2,2.9),
#'                         z = c(3.4,-3.8,-1.4,1.8,3.5,2.5,2.6,-4.8,-3.8,3.9,4.1,-3.6,-4))
#' img <- points2img(point.set, voxel.size = c(2,2,2), method = 'histogram')
#' @export

points2img <- function(points, voxel.size, method, channels = NULL, ncpu = 1) {
  
  if (!all(c("x","y","z") %in% colnames(points))) {
    stop("ERROR: Columns x,y,z must be present in input data frame.")
  }
  
  if(!is.null(channels)) {
    if(!("channel" %in% colnames(points))) {
      stop("ERROR: channel column must be present in input data frame.")
    } else if(!all(channels %in% unique(points$channel))) {
      stop("ERROR: Unknown channel requested.")
    }
  }  
  range.x <- range(points$x)
  range.y <- range(points$y)
  range.z <- range(points$z)
  image.size <- ceiling(c((range.x[2]-range.x[1])/voxel.size[1], 
                          (range.y[2]-range.y[1])/voxel.size[2], 
                          (range.z[2]-range.z[1])/voxel.size[3]))
  if(any(image.size>.Machine$integer.max)) {
    stop("ERROR: Target image size too big.")
  }
  # Define voxels
  x.bins <- cut(points$x, breaks = image.size[1])
  y.bins <- cut(points$y, breaks = image.size[2])
  z.bins <- cut(points$z, breaks = image.size[3])
  # Assign points to voxels
  points$x.bin <- as.numeric(x.bins)
  points$y.bin <- as.numeric(y.bins)
  points$z.bin <- as.numeric(z.bins)
  # Extract voxels boundaries
  x.bins <- cbind(lower = as.numeric( sub("\\((.+),.*", "\\1", levels(x.bins)) ),
                  upper = as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", levels(x.bins))))
  y.bins <- cbind(lower = as.numeric( sub("\\((.+),.*", "\\1", levels(y.bins)) ),
                  upper = as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", levels(y.bins))))
  z.bins <- cbind(lower = as.numeric( sub("\\((.+),.*", "\\1", levels(z.bins)) ),
                  upper = as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", levels(z.bins))))
  x.bins.breaks <- sort(unique(c(x.bins[,"lower"], x.bins[,"upper"])))
  y.bins.breaks <- sort(unique(c(y.bins[,"lower"], y.bins[,"upper"])))
  z.bins.breaks <- sort(unique(c(z.bins[,"lower"], z.bins[,"upper"])))
  
  # Voxel position is the centre of the corresponding bin
  x.bins.centres <- apply(x.bins, 1, mean)
  y.bins.centres <- apply(y.bins, 1, mean)
  z.bins.centres <- apply(z.bins, 1, mean)
   
  if(length(channels)>1) {
    I <- ff::ff(0, dim = c(image.size, length(channels)))
  } else {
    I <- ff::ff(0, dim = image.size) 
  }
  
  if(method == 'photon') {
    if (!("phot" %in% colnames(points) || "photon" %in% colnames(points))) {
      stop("ERROR: Column 'phot[on]' must be present in input data frame to use method 'photon'.")
    }
    if (!("locprec" %in% colnames(points))) {
      stop("ERROR: Column 'locprec' must be present in input data frame to use method 'photon'.")
    }
    if (!("locprecz" %in% colnames(points))) {
      warning("WARNING: Column 'locprecz' not present in input data frame. Using values from locprec column for z.")
      points$locprecz <- points$locprec
    }
    cluster <- parallel::makeCluster(ncpu)
    doParallel::registerDoParallel(cluster)
    on.exit(parallel::stopCluster(cluster))
    # Calculate photon contribution of each point to each voxel
    # A point contributes to voxels within 3*locprec of its position
    i <- NULL
    tmp <- foreach::foreach(i = 1:nrow(points), .combine = 'c', .packages = c('pracma','ff')) %dopar% {
      # for(i in 1:nrow(points)) {
      p <- points[i,]
      # Find corners of the bounding box (i.e. +/- 3*locprec along each axis)
      range.vx <- findInterval(c(p$x - 3*p$locprec, p$x + 3*p$locprec), x.bins.breaks, all.inside = TRUE)
      range.vy <- findInterval(c(p$y - 3*p$locprec, p$y + 3*p$locprec), y.bins.breaks, all.inside = TRUE)
      range.vz <- findInterval(c(p$z - 3*p$locprecz, p$z + 3*p$locprecz), z.bins.breaks, all.inside = TRUE)
      # Make sure we don't get out of the image
      range.vx[2] <- min(range.vx[2], image.size[1])
      range.vy[2] <- min(range.vy[2], image.size[2])
      range.vz[2] <- min(range.vz[2], image.size[3])
      # Get all voxels in the bounding box
      voxels <- expand.grid(x = range.vx[1]:range.vx[2], y = range.vy[1]:range.vy[2], z = range.vz[1]:range.vz[2])
      for(j in 1:nrow(voxels)) {
        v <- voxels[j,]
        # Get voxel position in the original coordinate system
        vx <- x.bins.centres[v$x]
        vy <- y.bins.centres[v$y]
        vz <- z.bins.centres[v$z]
        # Test if voxel is within ellipsoid
        test <- (p$x-vx)^2/(9*p$locprec^2) + (p$y-vy)^2/(9*p$locprec^2) + (p$z-vz)^2/(9*p$locprecz^2)
        if(test>1) { next }
        sigma <- p$locprec * sqrt(2)
        sigma.z <- p$locprecz * sqrt(2)
        if(length(channels) > 1) {
          c <- match(p$channel, channels)
          if(!is.na(c)) {
            I[v$x,v$y,v$z,c] <- I[v$x,v$y,v$z,c] + p$phot/4 * (pracma::erf((vx-p$x+0.5)/(sigma)) - pracma::erf((vx-p$x-0.5)/(sigma))) * (pracma::erf((vy-p$y+0.5)/(sigma)) - pracma::erf((vy-p$y-0.5)/(sigma))) * (pracma::erf((vz-p$z+0.5)/(sigma.z)) - pracma::erf((vz-p$z-0.5)/(sigma.z)))
          }
        } else {
          I[v$x,v$y,v$z] <- I[v$x,v$y,v$z] + p$phot/4 * (pracma::erf((vx-p$x+0.5)/(sigma)) - pracma::erf((vx-p$x-0.5)/(sigma))) * (pracma::erf((vy-p$y+0.5)/(sigma)) - pracma::erf((vy-p$y-0.5)/(sigma))) * (pracma::erf((vz-p$z+0.5)/(sigma.z)) - pracma::erf((vz-p$z-0.5)/(sigma.z)))
        }
        NULL # Return NULL to save memory
      }
    }
  } else if(method == 'histogram') {
    if(length(channels) > 1) {
      for(c in channels) {
        pts <- points[which(points$channel == c), c('x','y','z')]
        bins <- aws::binning(x = pts, y = NULL, nbins = image.size[1:3])
        I[,,,c] <- bins$table.freq
      }
    } else {
      if(length(channels == 1)) {
        pts <- points[which(points$channel == channels), c('x','y','z')]
        bins <- aws::binning(x = pts, y = NULL, nbins = image.size)
      } else {
        bins <- aws::binning(x = points[,c('x','y','z')], y = NULL, nbins = image.size)
      }
      I <- array(bins$table.freq, dim = dim(bins$table.freq))
    }
  } else {
    stop(paste0("ERROR: Unknown method: ", method))
  }
  return(I[])
}

#' points_from_roi
#'
#' Extract points within given bounding box.
#' Points are translated so that (0,0,0) correspond to the bounding box corner defined by
#' roi['min',c('x','y','z')]
#'
#' @param points a point set as a data frame of coordinates with columns x,y,z.
#' @param roi a data frame with columns x,y,z and rows min and max defining a bounding box
#' @return a data frame with same columns as input
#' @export

points_from_roi <- function(points, roi) {
  points$x <- points$x - roi['min','x']
  points$y <- points$y - roi['min','y']
  points$z <- points$z - roi['min','z']
  
  idx.to.keep <- which(points$x>0 & points$x<(roi['max','x']-roi['min','x']) 
                       & (points$y>0 & points$y<(roi['max','y']-roi['min','y']))
                       & (points$z>0 & points$z<(roi['max','z']-roi['min','z'])))
  if(length(idx.to.keep)>0) {
      return(points[idx.to.keep,, drop = FALSE])
  } else {
      stop("ERROR: ROI is empty.")
  }
}

#' locs2ps
#'
#' Cluster localizations into point sets using DBSCAN
#'
#' @param points a point set as a data frame of coordinates with columns x,y,z.
#' @param eps DBSCAN parameter, size of the epsilon neighbourhood
#' @param minPts DBSCAN parameter, number of minimum points in the eps region
#' @param keep.locprec logical (default: TRUE), whether to preserve the localization precision columns
#' @param keep.channel logical (default: TRUE), whether to preserve channel information column
#' @param cluster.2d logical (default: FALSE), whether to cluster only using x,y (and ignore z)
#' @return a list of matrices with columns x,y,z and eventually locprec[z] and names set to the cluster indices.
#' @export

locs2ps <- function(points, eps, minPts, keep.locprec = TRUE, keep.channel = TRUE, cluster.2d = FALSE) {
  
  is.2d <- FALSE
  if(!("z" %in% colnames(points))) {
    is.2d <- TRUE
  }
  if(keep.locprec && keep.channel) {
    if(is.2d) {
      points <- points[, c("x", "y", colnames(points)[grep("locprec|channel", colnames(points))])]
    } else {
      points <- points[, c("x", "y", "z", colnames(points)[grep("locprec|channel", colnames(points))])]
    }
  } else if(keep.locprec && !keep.channel) {
    if(is.2d) {
      points <- points[, c("x", "y", colnames(points)[grep("locprec", colnames(points))])]
    } else {
      points <- points[, c("x", "y", "z", colnames(points)[grep("locprec", colnames(points))])]
    }
  } else if(!keep.locprec && keep.channel) {
    if(is.2d) {
      points <- points[, c("x", "y", colnames(points)[grep("channel", colnames(points))])]
    } else {
      points <- points[, c("x", "y", "z", colnames(points)[grep("channel", colnames(points))])]
    }
  } else {
    if(is.2d) {
      points <- points[, c("x", "y")]
    } else {
      points <- points[, c("x", "y", "z")]
    }
  }
  if(cluster.2d || is.2d) {
    dbr <- dbscan::dbscan(points[, c("x", "y")], eps = eps, minPts = minPts, borderPoints = FALSE)
  } else {
    dbr <- dbscan::dbscan(points[, c("x", "y", "z")], eps = eps, minPts = minPts, borderPoints = FALSE)
  }
  points$site <- dbr$cluster
  idx.to.remove <- which(points$site==0, arr.ind = TRUE)
  if(length(idx.to.remove)>0) {
    points <- points[-idx.to.remove,, drop = FALSE]
  }
  PS <- split(points, points$site)
  PS <- lapply(PS, function(x) { x["site"] <- NULL; as.matrix(x) })
  return(PS)
}


#' idx2rowcol
#'
#' Convert indices into a dist object to row, column coordinates of the corresponding distance matrix
#'
#' @param idx vector of indices
#' @param n size of the n x n distance matrix
#' @return a matrix with two columns nr and nc
#' @export

idx2rowcol <- function(idx,n) {
  nr <- ceiling(n-(1+sqrt(1+4*(n^2-n-2*idx)))/2)
  nc <- n-(2*n-nr+1)*nr/2+idx+nr
  return(cbind(nr,nc))
}

#' rotz
#'
#' Create a rotation matrix representing a rotation of theta radians about the z-axis
#'
#' @param theta angle in radians
#' @return a 3x3 rotation matrix
#' @export

rotz <- function(theta) {
  c <- cos(theta)
  s <- sin(theta)
  R <- matrix(c(c, -s, 0, s, c, 0, 0, 0, 1), nrow = 3, ncol = 3, byrow = TRUE)
  return(R)
}

#' roty
#'
#' Create a rotation matrix representing a rotation of theta radians about the y-axis
#'
#' @param theta angle in radians
#' @return a 3x3 rotation matrix
#' @export

roty <- function(theta) {
  c <- cos(theta)
  s <- sin(theta)
  R <- matrix(c(c, 0, s, 0, 1, 0, -s, 0, c), nrow = 3, ncol = 3, byrow = TRUE)
  return(R)
}

#' rotx
#'
#' Create a rotation matrix representing a rotation of theta radians about the x-axis
#'
#' @param theta angle in radians
#' @return a 3x3 rotation matrix
#' @export

rotx <- function(theta) {
  c <- cos(theta)
  s <- sin(theta)
  R <- matrix(c(1, 0, 0, 0, c, -s, 0, s, c), nrow = 3, ncol = 3, byrow = TRUE)
  return(R)
}

#' local_densities
#' 
#' Compute local point density at each point of a point set
#' 
#' Local density is computed as in Ning X, Li F, Tian G, Wang Y (2018) 
#' An efficient outlier removal method for scattered point cloud data. 
#' PLOS ONE 13(8):e0201280. https://doi.org/10.1371/journal.pone.0201280
#'
#' @param X point set, a N x D matrix
#' @param k (optional) number of nearest neighbors used (defaults to all points).
#' @return vector of density value for each point
#' @export
#' 

local_densities <- function(X, k = NULL) {
  
  n <- nrow(X)
  if(is.null(k)) {
    k <- n-1
  }

  knnDist <- dbscan::kNNdist(X, k, all = TRUE)
  d <- apply(knnDist, 1, mean)
  densities <- rep(0, n)
  for(i in 1:n) {
    for(j in 1:k) {
      densities[i] <- densities[i] + exp(-knnDist[i,j]/d[i])
    }
  }
  densities <- densities/k
  
  return(densities)
}

#' find_elbow
#' 
#' Find elbow in a 2D curve represented by a list of ordered values
#' 
#' This function finds the point with maximum distance from the line between
#' the first and last points.
#' Adapted from StackOverflow:
#' http://stackoverflow.com/questions/2018178/finding-the-best-trade-off-point-on-a-curve
#' 
#' @param values vector of values in decreasing order
#' @return index and value of the selected point
#' @export

find_elbow <- function(values) {
  
  n <- length(values)
  
  # Check to see if values are ordered
  if (is.unsorted(values) && is.unsorted(rev(values))) {
    stop("Values must be in decreasing order") 
  }
  
  coords <- cbind(seq.int(n), as.matrix(values))
  # Vector between first and last point
  line <- coords[n,] - coords[1,]
  # Normalize the vector
  line <- line/sqrt(sum(line^2));
  # Vector between all points and first point
  V1 <- sweep(coords, 2, coords[1,]) # default function in sweep is - (minus)
  # To calculate the distance to the line, we split V1 into two
  # components, V2 that is parallel to the line and V3 that is perpendicular.
  # The distance is the norm of the part that is perpendicular to the line.
  # We find V2 by projecting V1 onto the line by taking the scalar product
  # of V1 with the unit vector on the line (this gives us the length of the
  # projection of V1 onto the line).
  # We get V2 by multiplying this scalar product by the unit vector.
  # The perpendicular vector is V1 - V2
  V2 <- (V1 %*% line) %*% line
  V3 <- V1 - V2
  # Distance to line is the norm of V3
  dist <- sqrt(rowSums(V3^2))
  idx <- which.max(dist)
  
  return(coords[idx,])
}

Try the LOMAR package in your browser

Any scripts or data that you put into this service are public.

LOMAR documentation built on March 18, 2022, 6:05 p.m.