R/sampling_curves.R

Defines functions make_rand_sampling_curve samplingcurve get_partners plot.samplingcurve lines.samplingcurve hist.samplingcurve plot_sampling_ecdf subsample plot_sampling_envelope plot_prop_identified urmvhyper negexp

Documented in hist.samplingcurve lines.samplingcurve make_rand_sampling_curve plot_prop_identified plot.samplingcurve samplingcurve subsample urmvhyper

#' Make (randomised) sampling curve object for neuron or connection table
#'
#' @details \code{make_rand_sampling_curve} is a thin wrapper for
#'   \code{\link{samplingcurve}}, which has the additional functionality of
#'   fetching (all) the synapses for one or more CATMAID neurons (specified
#'   using any form compatible with \code{\link{catmaid_skids}}).
#'
#' @param x A \code{\link{catmaid_skids}} compatible neuron specification or a
#'   data.frame generated by \code{\link{catmaid_get_connector_table}}.
#' @param partners Whether to examine the input or output partners for given
#'   neuron.
#' @param sample Whether to randomise the connection order (this is the right
#'   thing to do if the input neuron has been completed but was not randomly
#'   sampled in the first place.)
#' @param volume An optional neuropil volume to which connectors can be
#'   restricted after being fetched from CATMAID. Can be either a character
#'   vector naming the volume or a surface object that is (or can be converted
#'   \code{\link{as.mesh3d}}) class \code{\link[rgl]{mesh3d}} e.g.
#'   \code{\link{hxsurf}}.
#' @param keep.input Whether or not to keep the raw input as an attribute on the
#'   \code{samplingcurve} object.
#'
#' @return An object of class \code{\link{samplingcurve}}
#' @export
#'
#' @examples
#'
#'
#' \donttest{
#' sc=make_rand_sampling_curve(2560371, partners='out')
#' head(sc)
#' plot(sc, main='Randomised output sampling curve for rLPTCIN')
#' lines(sc, rand=20)
#'
#' scin=make_rand_sampling_curve(2560371, partners='in')
#' plot(sc, main='Randomised input sampling curve for rLPTCIN')
#' lines(sc, rand=20)
#' }
make_rand_sampling_curve <- function(x, partners=c("auto", "out", 'in'),
                                     sample=TRUE, volume=NULL, keep.input=TRUE) {
  partners=match.arg(partners)
  if(!is.data.frame(x)) {
    if(partners=='auto'){
      warning('partners="auto". Assuming that you want the outputs of this neuron')
      partners="out"
    }
    x=get_partners(x, partners = partners, volume = volume)
  }

  nunique=function(x) length(unique(x))
  if("partner_skid" %in% colnames(x)) {
    partner_skid <- x$partner_skid
  } else {
    if(partners=="auto") {
      partners <- ifelse(nunique(x$post_skid) > nunique(x$pre_skid), "out", "in")
    }
    partner_skid <- if(partners=='out') x$post_skid else x$pre_skid
  }
  if(sample)
    partner_skid <- sample(partner_skid)
  res <- samplingcurve(partner_skid)
  if(keep.input){
    attr(res, 'partners')=x
  }
  res
}


#' Make a basic sampling curve from a vector of partner ids
#'
#' @param partners A vector or partner neuron identifiers (typically numeric
#'   such as CATMAID skeleton ids)
#' @param N,m optional parameters describing the total number of connections and
#'   the total number of partners (if known).
#'
#' @return An object of class \code{samplingcurve}, currently implemented as a
#'   data.frame.
#' @export
#'
#' @examples
#' scuniform=samplingcurve(sample(1:20, size=200, replace=TRUE))
#' plot(scuniform)
#' # add 20 random realisations
#' lines(scuniform, rand=20)
#' # add a smooth mean
#' lines(scuniform, rand=1000, mean=TRUE, col='black')
#'
#' # use real sample data for inputs to a lateral horn neuron
#' plot(pd2a1.1.sc)
#' lines(pd2a1.1.sc, rand=20)
samplingcurve <- function(partners, N=NULL, m=NULL) {
  new=!duplicated(partners)
  csnew=cumsum(new)
  res=data.frame(new=csnew, partner=partners)
  class(res)=c('samplingcurve', class(res))
  attr(res, 'N')=N
  attr(res, 'm')=m
  res
}


#' @importFrom catmaid catmaid_get_connector_table catmaid_get_node_count nsoma
#' @importFrom nat pointsinside
get_partners <- function(x, partners=c("out", "in"), volume=NULL) {
  partners=match.arg(partners)
  df <- catmaid_get_connector_table(x, direction = partners)
  unidentified_partners=is.na(df$partner_skid)
  if(any(unidentified_partners)) {
    warning("Dropping ", sum(unidentified_partners),' for neuron: ', x)
    dropped=df[unidentified_partners,,drop=FALSE]
    df=df[!unidentified_partners,,drop=FALSE]
    attr(df, 'unidentified_partners')=dropped
  }
  df$nodes=catmaid_get_node_count(df$partner_skid)
  df$nsoma=nsoma(df$partner_skid)
  if(!is.null(volume)) {
    if(is.character(volume)){
      if(!requireNamespace('elmr'))
        stop("Suggested package elmr is required for named volumes!",
             " Install using:\n",
             "  devtools::install_github('jefferis/elmr')")
      volume=subset(elmr::FAFBNP.surf, subset=volume)
    }
    pp=pointsinside(df, volume)
    df=df[pp,,drop=FALSE]
  }
  df
}

#' @description \code{plot.samplingcurve} plots a standard sampling curve
#'
#' @param ... Additional arguments to plotting functions
#'
#' @export
#' @rdname samplingcurve
#' @importFrom graphics plot abline
plot.samplingcurve <- function(x, col='red', ...) {
  plot(x$new, type='l', xlab='Connections Tested', ylab='New Neurons', col=col, ...)
  abline(a=0, b=1, lty=2)
}


#' @description \code{lines.samplingcurve} adds a line for a sampling curve to
#'   an existing plot. It can also add a specified number of re-randomised
#'   versions of the curve, optionally producing a smoothed mean.
#' @param rand number of randomised versions of curve to plot
#' @param mean whether to plot the mean of specified number of random curves
#'   rather than each individual curve.
#' @param lty line type (see \code{\link{par}})
#' @param col line colour (see \code{\link{lines}})
#' @param colpal A colour palette. Either a function (see
#'   \code{\link[grDevices]{rainbow}}) or a vector of colour names. Only used
#'   when \code{col=NULL})
#' @importFrom graphics lines
#' @export
#' @rdname samplingcurve
lines.samplingcurve <- function(x, rand=0, mean=FALSE, lty=3, col=NULL, ..., colpal='grey'){
  if(is.null(col)) {
    if(is.function(colpal)) colpal <- colpal(rand)
    col=colpal
  }

  if(rand<1){
    lines(x$new, lty=lty, col=col, ...)
    return(invisible(NULL))
  }

  rs=replicate(rand, subsample(x)$new)
  if(mean){
    rsm=rowMeans(rs)
    lines(rsm, col=col, ...)
    return(invisible(NULL))
  }

  if(length(col)==1) col=rep(col, rand)
  for(i in seq_len(rand)) {
    lines(rs[,i], col=col[i], lty=lty, ...)
  }
}


#' @description \code{hist.samplingcurve} plots a histogram of connections per
#'   partner neuron. strictly speaking this is a bar plot for a table object
#'   rather than an R histogram
#' @param x A \code{\link{samplingcurve}} object
#' @param decreasing Whether to plot the strongest connections closest to the y
#'   axis (default \code{TRUE})
#' @param plot Whether to show the histogram
#'
#' @return \code{hist.samplingcurve} returns the \code{table} of connections per
#'   partner used for the plot.
#' @export
#'
#' @rdname samplingcurve
#' @examples
#' scuniform=samplingcurve(sample(1:20, size=200, replace=TRUE))
#' hist(scuniform, main='20 neurons with equal connection probability')
#'
#' hist(pd2a1.1.sc, main='Inputs to a lateral horn neuron')
hist.samplingcurve <- function(x, decreasing = TRUE, plot=TRUE, ...) {
  tt=table(x$partner)
  stt=sort(tt, decreasing=decreasing)
  names(stt) <- seq_along(stt)
  if(plot)
    plot(stt, xlab='Partner', ylab='connections', ...)
  invisible(stt)
}

#' @importFrom stats ecdf
plot_sampling_ecdf <- function(x, decreasing = TRUE, ...) {
  if(!inherits(x, 'samplingcurve'))
    x=make_rand_sampling_curve(x, sample = FALSE)
  tt=table(x$partner)
  stt=sort(tt, decreasing=decreasing)
  x=rep(seq_along(stt), stt)
  plot(ecdf(x), main="Cumulative Partner Plot", xlab='Partner', ylab='Fraction of Connections', ...)
}

#' Reorder a sampling curve object, optionally subsampling a given fraction
#'
#' @param x A \code{samplingcurve} object or a neuron specification that can be
#'   passed to \code{make_rand_sampling_curve}.
#' @param fraction A fraction from 0-1 specifying the proportion of connections
#'   to keep in the sample.
#'
#' @return A new \code{samplingcurve} object
#' @export
#'
#' @examples
#' scuniform=samplingcurve(sample(1:20, size=200, replace=TRUE))
#' hist(scuniform)
#' head(scuniform)
subsample <- function(x, fraction=1.0) {
  if(!inherits(x, 'samplingcurve'))
    x=make_rand_sampling_curve(x, sample = FALSE)
  if(fraction<0 || fraction>1) stop("fraction must be in range (0,1)")
  nsample=round(nrow(x)*fraction)
  if(nsample<1) stop("Sample fraction too small!")
  idxs=sample(seq.int(nrow(x)), size = nsample)

  fakedf=data.frame(post_skid=x[idxs, 'partner'])
  res=make_rand_sampling_curve(fakedf, sample = F)
  attr(res, 'N')=nrow(x)
  attr(res, 'm')=sum(unique(x$partner))
  res
}

#' @importFrom grDevices rainbow
plot_sampling_envelope <- function(x, rand.samples=20, partners="auto", ...) {
  orig <- if(inherits(x, 'samplingcurve')) x else
    make_rand_sampling_curve(x, partners = partners, sample=F)
  if(rand.samples==1) stop("Either 0 or >1 random samples!")
  rs=replicate(rand.samples, subsample(x)$new)
  plot(orig, ylim=c(0, max(c(orig$new, unlist(rs)))), ...)
  if(rand.samples==0)
    return(invisible(NULL))
  rsm=rowMeans(rs)
  lines(rsm, col='black')
  for(i in seq_len(ncol(rs))) {
    lines(rs[,i], col=rainbow(ncol(rs))[i], lty=3)
  }
}


#' Plot proportion of targets identified versus connections tested
#'
#' @param x A \code{samplingcurve} object
#' @param conn_threshold A (vector of) absolute connection thresholds (i.e.
#'   integral number of partners). A threshold of 1 (the default) implies
#'   partners with 1 or more connections (i.e. all partners).
#' @param sample Whether to randomise the sampling order
#' @inheritDotParams samplingcurve
#'
#' @export
#' @examples
#' scuniform=samplingcurve(rep(1:20,10))
#' # no randomisation, which reveals the non-random order of connections in the
#' # scuinform object
#' plot_prop_identified(scuniform)
#' # randomising partner order
#' plot_prop_identified(scuniform, sample=TRUE)
#' plot_prop_identified(scuniform, conn_threshold=1:3, sample=TRUE)
plot_prop_identified <- function(x, conn_threshold=1, sample=FALSE, ...) {
  prop_identified <- function(conn_threshold, x, sample=FALSE) {
    tt=table(x$partner)
    selected_partners=names(which(tt>conn_threshold))
    if(sample) x$partner=sample(x$partner)
    new=!duplicated(x$partner)
    new_in_selected=new & x$partner %in% selected_partners
    cs=cumsum(new_in_selected)
    cs
  }
  css=sapply(conn_threshold, prop_identified, x, sample=sample)
  if(!is.matrix(css)) css=matrix(css, ncol=1)

  css=scale(css, center=FALSE, scale = css[nrow(css),])*100
  x=seq(from=0, to=100, along.with = css[,1])
  plot(x, css[,1], type='l',
       xlab='Percent Synapses Tested',
       ylab='Percent Partners Identified', ...)
  if(ncol(css)>1) {
    for(i in seq_len(ncol(css))[-1]) {
      lines(x, css[,i], col=rainbow(ncol(css))[i], lty=3)
    }
  }
}


#' Generate replicates for uniform multivariate hypergeometric distribution
#'
#' @details \code{N,m} The number of marbles of each colour in the urn must be
#'   integral. If the total number of marbles, \code{N}, is not evenly divisible
#'   by the number of colours, \code{m}, the remaining r marbles are added one
#'   by one to the the first r colours.
#'
#' @param N number of marbles
#' @param m number of colours
#' @param k sample size
#' @param fraction Fraction of N to sample
#' @param nn Number of replicates
#' @importFrom extraDistr rmvhyper
#' @seealso \code{\link[extraDistr]{rmvhyper}}, \code{\link{rhyper}}
#' @export
urmvhyper <- function(N, m, k=NULL, fraction=NULL, nn=10e3) {
  if(is.null(k)) k=round(fraction*N)
  # what should we make the distribution of the m values of n_i?
  # should be uniform, but must also be integer
  ni=rep(as.integer(floor(N/m)), m)
  # now add 1 to as many of these as necessary
  remainder = N%%m
  if(remainder>0)
    ni[1:remainder]=ni[1:remainder]+1L
  rmvhyper(nn, n = ni, k=k)
}

negexp <- function(A, k) {function(x) A*(1-exp(-k*x))}
jefferis/emsampling documentation built on July 21, 2019, 2:20 a.m.