R/parser.R

Defines functions read_luminex_data postprocess_xponent postprocess_intelliflex handle_datetime parse_xponent_locations remove_non_analyte_columns find_analyte_names check_data filter_list

Documented in handle_datetime read_luminex_data

intelliflex_to_xponent_mapping <- VALID_DATA_TYPES
names(intelliflex_to_xponent_mapping) <- c(
  "MEDIAN", "COUNT", "NET.MEDIAN", "NET.AVERAGE.MEDIAN", "AVERAGE.MFI"
)
data_must_contain <- c("Median", "Count") # Median is a must have
location_column_name <- "Location"
sample_column_name <- "Sample"
non_analyte_columns <- c("Location", "Sample", "Total.Events")

filter_list <- function(lst, allowed_names) {
  lst[names(lst) %in% allowed_names]
}

check_data <- function(data) {
  for (must_contain in data_must_contain) {
    if (!(must_contain %in% names(data))) {
      stop("Could not find at least one of the following data types: ", paste(data_must_contain, collapse = ", "))
    }
    if (!(location_column_name %in% colnames(data[[must_contain]]))) {
      stop("Could not find location column in datatype: ", must_contain)
    }
  }
}

find_analyte_names <- function(data) {
  analytes <- colnames(data[])
  analytes <- analytes[!(analytes %in% non_analyte_columns)]
  analytes
}

remove_non_analyte_columns <- function(data) {
  for (data_type in names(data)) {
    df <- data[[data_type]]
    df <- df[, !(colnames(df) %in% non_analyte_columns)]
    data[[data_type]] <- df
  }
  data
}

parse_xponent_locations <- function(xponent_locations) {
  # Convert strings like this 77(1,E10) to E10
  regex <- "(\\d+?)\\((\\d+),(\\w\\d+)\\)"
  locations <- gsub(regex, "\\3", xponent_locations)
  locations
}

#' Handle differences in datetimes
#'
#' @description
#' Handle differences in the datetime format between xPONENT and INTELLIFLEX
#' and output POSIXct datetime object containing the correct datetime with the default timezone.
#'
#' @import lubridate
#'
#' @param datetime_str The datetime string to parse
#' @param file_format The format of the file. Select from: xPONENT, INTELLIFLEX
#'
#' @return POSIXct datetime object
#'
#' @keywords internal
#'
handle_datetime <- function(datetime_str, file_format = "xPONENT") {
  if (file_format == "xPONENT") {
    possible_orders <- c(
      "mdY IM p", "mdY HM", "mdY IMS p", "mdY HMS",
      "Ymd IM p", "Ymd HM", "Ymd IMS p", "Ymd HMS",
      "dmY IM p", "dmY HM", "dmY IMS p", "dmY HMS"
    )
  } else if (file_format == "INTELLIFLEX") {
    possible_orders <- c(
      "Ymd IMS p", "Ymd HMS", "Ymd IM p", "Ymd HM",
      "mdY IMS p", "mdY HMS", "mdY IM p", "mdY HM",
      "dmY IMS p", "dmY HMS", "dmY IM p", "dmY HM"
    )
  } else {
    stop("Invalid file format: ", file_format)
  }

  first_attempt <- lubridate::parse_date_time2(datetime_str, orders = possible_orders[1], tz = "")
  if (!is.na(first_attempt)) {
    return(first_attempt)
  } else {
    message("Could not parse datetime string using default datetime format. Trying other possibilies.")
    for (order in possible_orders[-1]) {
      datetime <- lubridate::parse_date_time2(datetime_str, orders = order, tz = "")
      if (!is.na(datetime)) {
        message("Successfully parsed datetime string using order: ", order)
        return(datetime)
      }
    }
    stop("Could not parse datetime string: ", datetime_str)
  }
}

postprocess_intelliflex <- function(intelliflex_output, verbose = TRUE) {
  data <- intelliflex_output$Results
  names(data) <- intelliflex_to_xponent_mapping[names(data)]
  data <- filter_list(data, VALID_DATA_TYPES)
  check_data(data)

  datetime_str <- intelliflex_output$SystemMetadata[["PLATE.START"]]
  plate_datetime <- handle_datetime(datetime_str, "INTELLIFLEX")

  analyte_names <- find_analyte_names(data$Median)
  data <- remove_non_analyte_columns(data)

  list(
    batch_name = intelliflex_output$SystemMetadata[["PLATE.NAME"]],
    sample_locations = intelliflex_output$SampleMetadata[["WELL.LOCATION"]],
    sample_names = intelliflex_output$SampleMetadata[["SAMPLE.ID"]],
    plate_datetime = plate_datetime,
    analyte_names = analyte_names,
    data = data,
    batch_info = intelliflex_output$SampleMetadata
  )
}

postprocess_xponent <- function(xponent_output, verbose = TRUE) {
  data <- xponent_output$Results
  data <- filter_list(data, VALID_DATA_TYPES)
  check_data(data)

  xponent_locations <- data$Median[[location_column_name]]
  sample_locations <- parse_xponent_locations(xponent_locations)
  sample_names <- data$Median[[sample_column_name]]
  analyte_names <- find_analyte_names(data$Median)
  data <- remove_non_analyte_columns(data)

  datetime_str <- paste(
    xponent_output$ProgramMetadata[["Date"]],
    xponent_output$ProgramMetadata[["Time"]]
  )
  plate_datetime <- handle_datetime(datetime_str, "xPONENT")

  list(
    batch_name = xponent_output$ProgramMetadata[["Batch"]],
    plate_datetime = plate_datetime,
    sample_locations = sample_locations,
    sample_names = sample_names,
    analyte_names = analyte_names,
    data = data,
    batch_info = xponent_output$Header
  )
}


valid_formats <- c("xPONENT", "INTELLIFLEX")

#' Read Luminex Data
#'
#' @description
#' Reads a file containing Luminex data and returns a Plate object.
#' If provided, can also read a layout file, which usually contains
#' information about the sample names, sample types or its dilutions.
#'
#' The function is capable of reading data in two different formats:
#' - xPONENT
#' - INTELLIFLEX
#' which are produced by two different Luminex machines.
#'
#'
#' @param plate_filepath Path to the Luminex plate file
#' @param layout_filepath Path to the Luminex layout file
#' @param format The format of the Luminex data. Select from: xPONENT, INTELLIFLEX
#' @param plate_file_separator The separator used in the plate file
#' @param plate_file_encoding The encoding used in the plate file
#' @param use_layout_sample_names Whether to use names from the layout file in extracting sample names.
#' @param use_layout_types Whether to use names from the layout file in extracting sample types.
#' Works only when layout file is provided
#' @param use_layout_dilutions Whether to use dilutions from the layout file in extracting dilutions.
#' Works only when layout file is provided
#' @param default_data_type The default data type to use if none is specified
#' @param sample_types a vector of sample types to use instead of the extracted ones
#' @param dilutions a vector of dilutions to use instead of the extracted ones
#' @param verbose Whether to print additional information and warnings. `TRUE` by default
#'
#' @return Plate file containing the Luminex data
#'
#' @examples
#' plate_file <- system.file("extdata", "CovidOISExPONTENT.csv", package = "PvSTATEM")
#' layout_file <- system.file("extdata", "CovidOISExPONTENT_layout.csv", package = "PvSTATEM")
#' plate <- read_luminex_data(plate_file, layout_file)
#'
#' plate_file <- system.file("extdata", "CovidOISExPONTENT_CO.csv", package = "PvSTATEM")
#' layout_file <- system.file("extdata", "CovidOISExPONTENT_CO_layout.xlsx", package = "PvSTATEM")
#' # To suppress warnings and additional information use verbose = FALSE
#' plate <- read_luminex_data(plate_file, layout_file, verbose = FALSE)
#'
#' @export
read_luminex_data <- function(plate_filepath,
                              layout_filepath = NULL,
                              format = "xPONENT",
                              plate_file_separator = ",",
                              plate_file_encoding = "UTF-8",
                              use_layout_sample_names = TRUE,
                              use_layout_types = TRUE,
                              use_layout_dilutions = TRUE,
                              default_data_type = "Median",
                              sample_types = NULL,
                              dilutions = NULL,
                              verbose = TRUE) {
  if (!(format %in% valid_formats)) {
    stop("Invalid format: ", format, ". Select from: ", paste(valid_formats, collapse = ", "))
  }

  verbose_cat("Reading Luminex data from: ", plate_filepath, "\nusing format ", format, "\n", verbose = verbose)

  parser_output <- switch(format,
    "xPONENT" = {
      output <- read_xponent_format(plate_filepath, verbose = verbose)
      postprocess_xponent(output, verbose = verbose)
    },
    "INTELLIFLEX" = {
      output <- read_intelliflex_format(plate_filepath, verbose = verbose)
      postprocess_intelliflex(output, verbose = verbose)
    }
  )

  plate_builder <- PlateBuilder$new(
    batch_name = parser_output$batch_name,
    sample_names = parser_output$sample_names,
    analyte_names = parser_output$analyte_names,
    verbose = verbose
  )

  plate_builder$set_plate_name(plate_filepath) # set a new plate name based on the file name
  plate_builder$set_plate_datetime(parser_output$plate_datetime)

  plate_builder$set_sample_locations(parser_output$sample_locations)
  layout_matrix <- NULL
  if (!is.null(layout_filepath)) {
    layout_matrix <- read_layout_data(layout_filepath)
    plate_builder$set_layout(layout_matrix)
  }
  if (is.null(layout_filepath) && (use_layout_types || use_layout_dilutions || use_layout_sample_names)) {
    use_layout_types <- FALSE
    use_layout_sample_names <- FALSE
    use_layout_dilutions <- FALSE
    verbose_cat(
      "(",
      color_codes$red_start,
      "WARNING",
      color_codes$red_end,
      ")",
      "\nLayout file not provided. Setting `use_layout_sample_names`,
      `use_layout_types` and `use_layout_dilutions` to FALSE.\n",
      verbose = verbose
    )
  }

  # Setting of samples names has to happen before setting sample types
  plate_builder$set_sample_names(use_layout_sample_names)
  plate_builder$set_sample_types(use_layout_types, sample_types)

  plate_builder$set_batch_info(parser_output$batch_info)
  plate_builder$set_default_data_type(default_data_type)
  plate_builder$set_data(parser_output$data)

  plate_builder$set_dilutions(use_layout_dilutions, dilutions)


  plate <- plate_builder$build(validate = TRUE)

  verbose_cat(color_codes$green_start, "\nNew plate object has been created with name: ",
    plate$plate_name, "!\n", color_codes$green_end, "\n",
    verbose = verbose
  )

  plate
}

Try the PvSTATEM package in your browser

Any scripts or data that you put into this service are public.

PvSTATEM documentation built on April 4, 2025, 4:34 a.m.