R/summaryFunctions.R

Defines functions createSummarySpikesMat createSummarySpikesArray

Documented in createSummarySpikesArray createSummarySpikesMat

#' Convert raw spike summary array into matrix or array without cells/odours missing data

#' @details Some analysis (eg PCA) cannot cope with missing data. These
#' functions will drop cells or odours in order to give a dataset without
#' missing data. The choice of NALimit will affect how many cells/odours are
#' dropped. NALimit=3, the default for some time, is now too low since it will
#' result in too many odours being dropped (while keeping more cells).

#' @description \code{createSummarySpikesMat} returns a 2D matrix without
#'   cells/odours missing data. The matrix has \code{ncells} rows x
#'   (\code{nodours} * \code{numSamplePoints}) columns. Timepoints for the same
#'   odour are in adjacent columuns.
#'
#' @family summary_array
#' @export
#' @rdname createSummarySpikesArray
#' @param summary_array Raw 3D array of the form generated by
#'   \code{\link{create_raw_summary_array}}.
#' @param numSamplePoints The number of temporal sample points (defaults to 7)
#' @param NALimit The maximum number of missing responses that are acceptable
#' @examples
#' summary_array=create_raw_summary_array()
#' clean_summary_array=createSummarySpikesArray(summary_array, NALimit = 25)
#' clean_summary_mat=createSummarySpikesMat(summary_array, NALimit = 25)
#' str(clean_summary_array)
#' str(clean_summary_mat)
createSummarySpikesMat <- function(summary_array, numSamplePoints=7, NALimit=3) {
  summary_array.sel <- createSummarySpikesArray(summary_array, numSamplePoints, NALimit)
  # Turn this into a cell x odour response matrix
  # NB final transpose is to get cell x odour (not odour x cell)
  t(apply(summary_array.sel, 1, t))
}

#' @description \code{createSummarySpikesArray} returns a 3D array without
#'   cells/odours missing data. The array has ncells rows x nodours columns *
#'   numSamplePoints slabs.
#' @export
createSummarySpikesArray <- function(summary_array, numSamplePoints=7, NALimit=3) {
  # choose odours for which we are not missing too much data
  odours.na <- apply(summary_array, 2, function(x) sum(is.na(x))) / numSamplePoints
  odours.sel <- names(which(odours.na <= NALimit))

  # identify which cells have some spikes
  cells.sel <- names(which(apply(summary_array[, odours.sel, ], 1, function(x) sum(is.na(x)))==0))
  # Keep cells with sufficient spikes and odours
  summary_array[cells.sel, odours.sel, ]
}
sfrechter/physplit.analysis documentation built on May 29, 2019, 8:02 p.m.