R/pre_process_fcs.R

Defines functions pre_process_fcs query_extract

Documented in pre_process_fcs query_extract

#' Extract and aggreagte fluorescence intensity matrixx of FCS files.
#' Perfom logicle transformation and scaling, downsampling if necessary
#'
#' @importFrom flowCore read.flowSet
#' @importFrom flowCore pData
#' @importFrom flowCore fsApply
#' @importFrom flowCore estimateLogicle
#' @importFrom flowCore transform
#' @importFrom flowCore parameters
#' @importFrom flowCore exprs
#' @importFrom flowCore identifier
#' @export
#' @param fcs_dir directory or vector containing fcs files to be used
#' @param downsampling number of event to randomly select from each fcs, if the number of events request is bigger than the number of event in the  fcs, all event are selected
#' @param rescale_all vector of two values indicating the range of the values to scale between
#'
#' @return a list containing the normalized aggregated dataframe and all_channels
#'

#Open fcs and put then in a flowset
pre_process_fcs <- function(fcs_dir,downsampling="none",rescale_all=c(0,4.5)){
  fs <- read.flowSet(fcs_dir,transformation = F,emptyValue = F)
  #get markers
  all_channels <- pData(parameters(fs[[1]]))[,c(1,2)]
  all_channels <- all_channels[as.vector(all_channels[,1] != "Time" & all_channels[,1] != "Event"),]
  shape_marker <- grep('FSC|SSC',all_channels$name,value = T)
  #transform values
  event_for_each_sample <- fsApply(fs,function(ff){
    lgcl <- estimateLogicle(ff, channels = setdiff(all_channels[,1],shape_marker),type="data")
    ff <- transform(ff,lgcl)
    mat <- data.frame(exprs(ff),check.names = F)
    #linear scale for scatter values
    if(!is.null(shape_marker)){
      mat[,shape_marker]<- sapply(shape_marker,function(x) norm_range(mat[,x,drop=F]))
    }
    mat[,"sample_id"] <-identifier(ff)
    return(mat)
  })
  if(downsampling !="none"){
    event_for_each_sample <- lapply(event_for_each_sample,function(x){
      if(nrow(x)>1000 & nrow(x) > downsampling){
        x[sample(nrow(x),downsampling),]
      }else{
        return(x)
      }
    })
  }
  processed_fcs_df <- data.frame(do.call("rbind", event_for_each_sample),check.names = F)
  if(!is.null(rescale_all)){
    processed_fcs_df[,all_channels[,1]]<- apply(processed_fcs_df[,all_channels[,1]],2,function(x) norm_range(x,rescale_all))
  }
  return(list("processed_fcs" =processed_fcs_df,"all_channels"=all_channels))
}
#' Query an aggregated dataframe to subset requested channels
#' @param processed_fcs_obj list containing a datraframe of processed intensities for each event and informations of channel used
#' @param channels vector containing channels to select. Can be "all" to select all channels, "with_desc" to select channels with a marker description or a vector a channels.
#' @export
query_extract <- function(processed_fcs_obj,channels=c("all","with_desc")[1]){
  all_channels <- processed_fcs_obj$all_channels
  processed_fcs_df <- processed_fcs_obj$processed_fcs
  #if channels are not specified
  if (all(channels == "with_desc")) {
    channels_to_select <- all_channels[!is.na(all_channels[, "desc"]), 1]
    processed_fcs_df <- processed_fcs_df[, c(channels_to_select, "sample_id")]
    channels <- colnames(processed_fcs_df)[colnames(processed_fcs_df) != "sample_id"]
  }
  if (all(channels != "with_desc") & all(channels != "all")) {
    processed_fcs_df <- processed_fcs_df[, c(channels, "sample_id")]
    channels <- colnames(processed_fcs_df)[colnames(processed_fcs_df) != "sample_id"]
  } else{
    channels <- all_channels[,1]
  }
  processed_fcs_df <- processed_fcs_df[,c(channels,"sample_id")]
  attr(processed_fcs_df,"all_channels") <- all_channels
  return(processed_fcs_df)
}
maxmeyl/excyte_1.0 documentation built on March 7, 2020, 2:01 a.m.