R/processFlowjoExportDir.R

Defines functions processFlowjoExportDir

Documented in processFlowjoExportDir

#' Function to batch process .csv files (containing the substring specified by 'csvstring') exported from flowjo within the directory specified by 'dir'. Directory must contain 'platesetupfile' (a tab-delimited .txt file) that contains column named IDvar (default to 'Sample') that should contain 96well ID's that are matched to the $FIL keyword in the .fcs file exported from the Attune.
#' Outputs a tab-delimited .txt file with file name specified by 'fileout'.
#' 2023-06-09 added fcsheader option to denote whether exported csv file contains FCS headers. if FALSE then will parse IDvar based removing csvstring from file name. Also modified flowjodf to allow input of IDvar argument.#' Process a directory of FlowJo Export files
#'
#' This function reads a directory of FlowJo Export csv files, asserts that there are matching files for all annotations,
#' combines all the csv files into a single data frame, joins the combined data frame with a plate setup file (annotations),
#' and writes the output to a text file.
#' Warning: below generated roxygen documentation generated by chatgpt without checking
#' @param dir A character string representing the directory that contains the FlowJo Export csv files.
#' @param csvstring A character string representing the pattern to match in the csv file names (default = '_P1.csv').
#' @param fileout A character string representing the output filename (default = 'P1_combodf.txt').
#' @param platesetupfile A character string representing the plate setup file name (default = 'platesetup.txt').
#' @param IDvar A character string representing the unique identifier variable/column name in the plate setup file (default = 'Sample').
#' @param fcsheader A logical indicating whether the csv files have Flow Cytometry Standard (FCS) headers (default = TRUE).
#'
#' @return A data frame that contains the combined data of all csv files and annotations from the plate setup file.
#' The output data frame is also written as a tab-delimited text file in the provided directory.
#'
#' @examples
#' \dontrun{
#'   processFlowjoExportDir(dir = "path/to/your/directory", csvstring = '_P1.csv',
#'                          fileout = 'P1_combodf.txt', platesetupfile = 'platesetup.txt',
#'                          IDvar = 'Sample', fcsheader = TRUE)
#' }
#' @importFrom assertthat assert_that
#' @importFrom data.table fread
#' @importFrom dplyr mutate bind_rows inner_join
#' @importFrom stringr str_remove
#' @export

processFlowjoExportDir <- function( dir, csvstring = '_P1.csv', fileout = 'P1_combodf.txt', platesetupfile = 'platesetup.txt', IDvar = 'Sample', fcsheader = TRUE ){

      assertthat::assert_that( file.exists( file.path(dir, platesetupfile)), msg = paste0( platesetupfile, ' does not exist') )

      annodf <- fread( file.path( dir, platesetupfile) )

      assertthat::assert_that( sum( names(annodf) == IDvar ) == 1, msg = paste0( IDvar, ' must be a unique column name in ', platesetupfile ) )

      csvfiles <- list.files( dir )[ grepl( csvstring, list.files( dir )  ) ]

      assertthat::assert_that( length(csvfiles) > 0, msg = 'no matching csv files in directory' )

      filelist <- vector( mode = 'list', length(csvfiles) )

      for ( f in 1:length(csvfiles) ){
            if( fcsheader == TRUE ){

                  df <- bears01::flowjo2df( file.path( dir, csvfiles[f]), IDvar = IDvar )
            } else{
                  df <- fread( file.path( dir, csvfiles[f]) ) %>%
                        dplyr::mutate(
                              filename = csvfiles[f],
                              !!sym(IDvar) := stringr::str_remove(string = csvfiles[f], pattern = csvstring)
                        )
            }
            print( paste0( 'working on ', csvfiles[f], ' in ', dir) )
            filelist[[f]] <- df
      }

      dfout <- dplyr::bind_rows(filelist) %>% #note this binds rows even if there are different columns
            dplyr::mutate(
                  csvstring = csvstring
            )

      # check that all annotations have a matching csv file
      if ( !(sum( annodf[[IDvar]] %in% dfout[[IDvar]] ) == length(annodf[[IDvar]]) ) ) {
            message( paste0('these sample annotations do not match any $FIL fields: ', toString( annodf[[IDvar]][!(annodf[[IDvar]] %in% dfout[[IDvar]] )] ) ) )
      }


      dfout <- dplyr::inner_join( annodf, dfout, by = IDvar)


      write.table( dfout, file.path( dir, fileout ), sep = '\t', col.names = TRUE, row.names = FALSE, quote = FALSE )

      return(dfout)
}
chris-hsiung/bears01 documentation built on April 9, 2024, 2:01 a.m.