R/gate_data.R

Defines functions gate_facs_yeast_HTS_data gate_it plot_gating

Documented in gate_facs_yeast_HTS_data gate_it plot_gating

#
# Authors: Florent Chuffart, INSERM and Gael Yvert, CNRS
# florent.chuffart@univ-grenoble-alpes.fr
# Gael.Yvert@ens-lyon.fr
#
#---------------------------------------------
#' Interrogates the FSC/SSC density plot and extract ncell from the peak of density.
#'
#' This function works of FL1H, FSC and SSC data acquired on 96-well plates
#' @param data A list containing a field called alldat, generated by the read_data.R function
#' @param frac_cell	A fraction of cells to keep after gating (approximately). This number defines the size of the gate (few cells: narrow gate).
#' @param graphic If TRUE (default), graphics are produced in pdf files to visualize gating. The name of these files will be in the format platewell.pdf.
#' @param graphicsdir	A character string giving the path to the directory where the graphics files should be written
#' @param pre_gating_rm_rate Pre-gating fraction of cells that will be remove on x and y axis. This allows to limit computations and filtering to the area of interest. Yeast cells being small, default value is 0.05.
#' @return A list of data.frames containing the data of GATED cells. data$gdat in a list of gated data, and data$anno is the summary of the informations, it now contains the summary of gated data.
#' @export
# The main function is kdce2d() that Florent implemented in gylab package.
# This function calls kde2d from the MASS package
#
# kerneld1$inside is the gating Boolean: TRUE = we keep it, FALSE = we don't.
# kernel$cl is the contourline defining the gate.

gate_facs_yeast_HTS_data <- function(data, frac_cells=0.4, graphics=TRUE, pre_gating_rm_rate=0.05, verbose=TRUE, hard_lim=NULL){
  kerneld = lapply(1:length(data$alldat), function(i) {
    #if (i %% 10 == 1) { print(paste(i, "/", length(data$alldat), "gating..."))  }
    print(paste(i, "/", length(data$alldat), "gating..."))
    l = data$alldat[[i]]
    x = l$FSC.H
    y = l$SSC.H
    if (!is.null(hard_lim)) {
      idx = (x >= hard_lim[1] & x <= hard_lim[2] & y >= hard_lim[3] & y <= hard_lim[4] )
    } else{
      idx = rep(TRUE, length(x))
    }
    thres_1 = pre_gating_rm_rate
    thres_2 = frac_cells/(1-thres_1)
    k = gate_it(x, y, idx=idx, thres_1=thres_1, thres_2=thres_2)
    if (graphics) plot_gating(k)
    return(k)
  })
  data$gdat = lapply(1:length(kerneld), function(i) {
    k = kerneld[[i]]
    data$alldat[[i]][k$idxgate,]
  })
  data$gate = lapply(kerneld, function(k) {
    k$cl
  })
  # quality control
  foo = lapply(data$gdat, function(l){
    list(
      median_gfl1 = median(l$FL1.H),
      median_gfsc = median(l$FSC.H),
      median_gssc = median(l$SSC.H),
      sd_gfl1 = sd(l$FL1.H),
      sd_gfsc = sd(l$FSC.H),
      sd_gssc = sd(l$SSC.H),
      nb_gcells = length(l$FL1.H)
    )
  })
  foo = do.call(rbind, foo)
  foo = data.frame(lapply(data.frame(foo, stringsAsFactors=FALSE), unlist), stringsAsFactors=FALSE)
  data$anno = cbind(data$anno, foo)
  return(data);
}

#---------------------------------------------
#' Generates the kerneld to gate the data
#'
#' This function works of FL1H, FSC and SSC data acquired on 96-well plates
#' @param x A vector of FSC data
#' @param y A vector of SSC data
#' @param thres_1 Pre-gating fraction of cells that will be remove on x and y axis. This allows to limit computations and filtering to the area of interest. Yeast cells being small, default value is 0.05.
#' @param thres_2	A fraction of cells to keep after gating (approximately). This number defines the size of the gate (few cells: narrow gate).
#' @return A kerneld
#' @export

gate_it = function(x, y, idx, thres_1, thres_2, n=34, DEBUG=FALSE) {
  tmp_x = x[idx]
  tmp_y = y[idx]
  # Removing saturated
  qx = quantile(tmp_x, probs=c(thres_1/2, 1 - thres_1/2))
  qy = quantile(tmp_y, probs=c(thres_1/2, 1 - thres_1/2))

  idxdesatx = tmp_x >= qx[1] & tmp_x <= qx[2]
  idxdesaty = tmp_y >= qy[1] & tmp_y <= qy[2]
  idxdesat = rep(FALSE, length(x))
  idxdesat[idx] = idxdesatx & idxdesaty

  nb_cell=sum(idxdesat)*thres_2

  x1 = x[idxdesat]
  y1 =  y[idxdesat]

  # lims = c(qx, qy)
  # print(lims)

  dx = qx[2] - qx[1]
  dy = qy[2] - qy[1]
  lims = c(qx + c(-0.1*dx,+0.1*dx), qy + c(-0.1*dy,+0.1*dy))
  # lims[which(lims <= 0)] = 1

  h = c(MASS::bandwidth.nrd(x1), MASS::bandwidth.nrd(y1))
  if (0 %in% h) {
    h[which(h == 0)] = 1
  }
  kerneld = MASS::kde2d(x1, y1, h=h, n=n, lims=lims)
  kerneld$xorig = x
  kerneld$yorig = y
  kerneld$idxorig = idx
  kerneld$xdesat = x1
  kerneld$ydesat = y1
  kerneld$idxdesat = idxdesat
  kerneld$lims = lims
  kerneld$n = n

  get_confidencebound = function(kerneld, prop=prop, DEBUG=DEBUG) {
    pp = sapply(1:length(kerneld$xdesat), function(i) {
      if (DEBUG & (i %% 10000 == 1)) {print(paste("Getting confidence bound", i, "/", length(kerneld$xdesat)))}
      z.x <- max(which(kerneld$x <= kerneld$xdesat[i]))
      z.y <- max(which(kerneld$y <= kerneld$ydesat[i]))
      return(kerneld$z[z.x, z.y])
    })
    confidencebound = quantile(pp, prop, na.rm = TRUE)
    return(confidencebound)
  }

  prop = min(1, max(0, 1-nb_cell/length(kerneld$xdesat)))
  # print(paste(nb_cell, "/", length(kerneld$xdesat), "===>", prop))
  confidencebound = get_confidencebound(kerneld, prop=prop, DEBUG=DEBUG)
  kerneld$cl = contourLines(kerneld, levels = confidencebound)

  baz = sapply(1:length(kerneld$cl), function(i) {
    poly = cbind(kerneld$cl[[i]]$x,kerneld$cl[[i]]$y)
    pts = cbind(kerneld$xdesat,kerneld$ydesat)
    inside = mgcv::in.out(poly, pts)
    return(inside)
  })
  # kerneld$inside = as.logical(apply(t(baz), 2, sum))
  bar = sapply(data.frame(baz),sum)
  idx = which(bar==max(bar))[1]
  kerneld$cl = kerneld$cl[idx]
  kerneld$inside = baz[,idx]
  tmp_idx = rep(FALSE, length(kerneld$xorig))
  tmp_idx[idxdesat] = kerneld$inside
  kerneld$idxgate = tmp_idx
  return(kerneld)
}



#---------------------------------------------
#' Plots Gating
#'
#' This function plots the gating.
#' @importFrom SpatialTools plot.contourLines
#' @export
plot_gating = function(k) {
  plot(k$xorig[k$idxorig], k$yorig[k$idxorig], pch=".")
  abline(v=k$lims[1:2])
  abline(h=k$lims[3:4])
  persp(k, phi = 30, theta = 20, d = 5)
  # plot(k$xdesat, k$ydesat, pch = ".", col= k$inside +1)
  # plot.contourLines(k$cl, add=TRUE)
  plot(k$xorig, k$yorig, pch = ".", col= k$idxgate + 1, xlim=k$lims[1:2], ylim=k$lims[3:4])
  plot.contourLines(k$cl, add=TRUE)
}
magrichard/facsticor documentation built on May 17, 2019, 8:16 a.m.