R/read_npx_delim.R

Defines functions get_field_separator read_npx_delim_wide read_npx_delim_long read_npx_delim

Documented in get_field_separator read_npx_delim read_npx_delim_long read_npx_delim_wide

#' Help function to read long or wide format
#' `r ansi_collapse_quot(x = get_olink_data_types(), sep = "or")` data from
#' delimited
#' `r ansi_collapse_quot(x = get_file_ext(name_sub = "delim"), sep = "or")`
#' files exported from Olink software in R.
#'
#' @description
#' The function can handle delimited files in long and wide format.
#'
#' @author
#'   Klev Diamanti
#'   Christoffer Cambronero
#'   Kathleen Nevola
#'
#' @inherit .read_npx_args params
#' @param file Path to Olink software output delimited file in wide or long
#' format. Expecting file extensions
#' `r ansi_collapse_quot(x = get_file_ext(name_sub = "delim"), sep = "or")`.
#'
#' @return Dataset,
#' `r ansi_collapse_quot(x = get_df_output_print(), sep = "or")`, with Olink
#' data in long or wide format.
#'
#' @keywords internal
#'
#' @seealso
#'   \code{\link{read_npx}}
#'   \code{\link{read_npx_parquet}}
#'   \code{\link{read_npx_zip}}
#'   \code{\link{read_npx_excel}}
#'   \code{\link{read_npx_format}}
#'
read_npx_delim <- function(file,
                           out_df = "arrow") {

  # check input ----

  # check if file exists
  check_file_exists(file = file,
                    error = TRUE)

  # check that the requested output df is ok
  check_out_df_arg(out_df = out_df)

  # read file ----

  # get filed separator
  sep <- get_field_separator(file = file)

  # Using two tryCatch to check if file can be read.
  # Initially we assume that the file is in long format, so we try to read in
  # the delimited file using `header = TRUE`.
  # We have observed that the approach above throws a warning if the file is in
  # wide format and contains rows with no NPX measurements. The warning contains
  # the sub-string "Stopped early on line". Only when this warning occurs we
  # assume that the file is in wide format, so we return "warning-1" for a
  # special handling later. If another warning, or some error occurs we simply
  # return it and continue.

  # check if long long works
  df_olink <- tryCatch({
    # Try executing long
    read_npx_delim_long(file = file, sep = sep)
  }, warning = function(w) {
    # Handle warnings
    if (grepl("Stopped early on line", w$message)) {
      return("warning-1")
    } else {
      return("warning")
    }
  }, error = function(e) {
    # Handle errors
    return("error")
  })

  # When a file is read as long while it is wide it will have names for the
  # first two columns, while all other columns will have the prefix V and will
  # be consecutive numbers. If we find that all the columns from the 3rd and
  # onward have the prefix V, we can safely assume that the file was actually in
  # wide format instead of long, and we can read it with `header = FALSE`.
  #
  # Also, because of potential rows with no NPX measurements, we have to set the
  # field separator and `fill = TRUE`. `fill = TRUE` has some issues with
  # setting the delimiter, so we provide it.
  #
  # In the special case when the file has uneven number of columns and is in
  # wide format (see relevant test), we also get another warning when the file
  # is re-read as wide because the previous fread did not close properly. In
  # this case, we use the "warning-1" from above as a flag to suppress further
  # warnings.
  #
  # In any case when warning or error persists, we also return an error.
  if (check_is_dataset(x = df_olink, error = FALSE)) {
    num_v_cols <- grepl(
      pattern = "^V\\d*$",
      x = names(df_olink),
      ignore.case = FALSE
    ) |>
      sum()
    num_v_col_min <- ncol(df_olink) - 2L
    df_olink_col_check <- (num_v_cols == num_v_col_min) && (nrow(df_olink) > 2L)
    df_olink_warn_check <- FALSE
  } else {
    df_olink_col_check <- TRUE
    if (df_olink == "warning-1") {
      df_olink_warn_check <- TRUE
    } else {
      df_olink_warn_check <- FALSE
    }
  }

  if (df_olink_col_check == TRUE) {
    df_olink <- tryCatch({
      # Try executing wide
      if (df_olink_warn_check == TRUE) {
        read_npx_delim_wide(file = file, sep = sep) |>
          remove_all_na_cols() |>
          suppressWarnings()
      } else {
        read_npx_delim_wide(file = file, sep = sep) |>
          remove_all_na_cols()
      }
    }, warning = function(w) {
      # Handle warnings
      return("warning")
    }, error = function(e) {
      # Handle errors
      return("error")
    })
  }

  # check if any error or warning occurred ----

  if (check_is_character(x = df_olink, error = FALSE)
      && grepl("warning|error", df_olink)) {

    # If both functions throw errors, return an error message
    cli::cli_abort( # nolint: return_linter
      c(
        "x" = "Unable to open delimited file: {.file {file}}!",
        "i" = "Check if {.arg file} is delimited or corrupt."
      ),
      call = rlang::caller_env(),
      wrap = FALSE
    )

  }

  # additional checks ----

  # too few columns

  if (ncol(df_olink) < 5L) {
    cli::cli_warn(
      message = c(
        "i" = "The delimited file {.file {file}} has too few
        (n={.val {ncol(df_olink)}}) columns!"
      ),
      call = rlang::caller_env(),
      wrap = FALSE
    )
  }

  # top row is as expected for the corresponding format

  read_npx_format_colnames(df = df_olink, file = file)

  # convert df class ----

  # if needed convert the object to the requested output
  df_olink <- convert_read_npx_output(df = df_olink,
                                      out_df = out_df)

  # return ----

  return(df_olink)
}

#' @rdname read_npx_delim
read_npx_csv <- read_npx_delim

#' Help function to read long format
#' `r ansi_collapse_quot(x = get_olink_data_types(), sep = "or")` data from
#' delimited
#' `r ansi_collapse_quot(x = get_file_ext(name_sub = "delim"), sep = "or")`
#' files exported from Olink software in R.
#'
#' @description
#' The function can handle delimited files in long format.
#'
#' @author
#'   Klev Diamanti
#'   Christoffer Cambronero
#'   Kathleen Nevola
#'   Ola Caster
#'
#' @inherit read_npx_delim params
#' @param sep Character separator of delimited input file. One of `NULL`
#' (default) for auto-detection, or `r ansi_collapse_quot(accepted_field_sep)`.
#' Used only for delimited output files from Olink software.
#'
#' @return Dataset, `r ansi_collapse_quot(x = "tibble")`, with Olink data in
#' long format.
#'
#' @keywords internal
#'
#' @seealso
#'   \code{\link{read_npx_delim_wide}}
#'   \code{\link{read_npx_delim}}
#'
read_npx_delim_long <- function(file,
                                sep) {

  # read long file ----

  df_olink <- data.table::fread(
    file = file,
    header = TRUE,
    sep = sep,
    stringsAsFactors = FALSE,
    na.strings = c("NA", ""),
    check.names = FALSE,
    data.table = FALSE
  ) |>
    dplyr::as_tibble()

  # return ----

  return(df_olink)

}

#' Help function to read wide format
#' `r ansi_collapse_quot(x = get_olink_data_types(), sep = "or")` data from
#' delimited
#' `r ansi_collapse_quot(x = get_file_ext(name_sub = "delim"), sep = "or")`
#' files exported from Olink software in R.
#'
#' @description
#' The function can handle delimited files in wide format.
#'
#' @author
#'   Klev Diamanti
#'
#' @inherit read_npx_delim params
#' @inherit read_npx_delim_long params
#'
#' @return Dataset, `r ansi_collapse_quot(x = "tibble")`, with Olink data in
#' wide format.
#'
#' @keywords internal
#'
#' @seealso
#'   \code{\link{read_npx_delim_long}}
#'   \code{\link{read_npx_delim}}
#'
read_npx_delim_wide <- function(file,
                                sep) {

  # Check if new lines are linux-like "\n" or Windows-like "\r\n" ----

  # if there is windows-line new line separator, we need to replace \r\n with
  # \n so that data.table::fread can read the empty lines. The updated file with
  # is written to a temp file, which is removed when exiting the function!.

  # read first line of the file
  file_line_1 <- readLines(file, n = 1L)
  # read first line plus 10 characters to extend a bit on the second line
  file_line_1_extended <- readChar(con = file,
                                   nchars = nchar(file_line_1) + 10L)
  if (grepl("\r\n", file_line_1_extended)) {
    # temporary directory to extract
    tmp_file <- tempfile()
    # read lines of file
    file_content <- readLines(con = file)
    file_content <- gsub("\r\n", "\n", file_content)
    writeLines(file_content, tmp_file)

    # cleanup temporary file after exiting the function
    on.exit(
      expr = invisible(file.remove(tmp_file)),
      add = TRUE
    )
  } else {
    tmp_file <- file
  }

  # read wide file ----

  df_olink <- data.table::fread(
    file = tmp_file,
    header = FALSE,
    sep = sep,
    stringsAsFactors = FALSE,
    na.strings = c("NA", ""),
    check.names = FALSE,
    data.table = FALSE,
    fill = TRUE
  ) |>
    dplyr::as_tibble() |>
    (\(df) dplyr::rename_with(df, ~ paste0("V", seq_len(ncol(df)))))()

  # return ----

  return(df_olink)

}

#' Help function to get the separator of a delimited file exported from Olink
#' software.
#'
#' @description
#' This function uses the first line of the provided file to determine the
#' separator of the file.
#'
#' \strong{Note:} The function does not allow presence of commas and semicolons
#' on the same line.
#'
#' @author
#'   Klev Diamanti
#'
#' @inherit read_npx_delim params
#'
#' @return The file delimiter
#' `r ansi_collapse_quot(x = accepted_field_sep, sep = "or")`.
#'
#' @keywords internal
#'
#' @seealso
#'   \code{\link{read_npx_delim}}
#'
get_field_separator <- function(file) {

  # check if file exists
  check_file_exists(file = file,
                    error = TRUE)

  # get the first 5 lines of the file
  file_lines <- tryCatch({
    readLines(con = file, n = 5L, skipNul = FALSE)
  }, warning = function(w) {
    # convert warning to error
    stop()
  }, error = function(e) {
    cli::cli_abort( # nolint: return_linter
      c(
        "x" = "Unable to open delimited file: {.file {file}}!",
        "i" = "Check if {.arg file} is delimited or corrupt."
      ),
      call = rlang::caller_env(),
      wrap = FALSE
    )
  })

  # count occurrences of the delimiter in each lines
  count_sep_occur <- accepted_field_sep |>
    sapply(function(d) sum(grepl(pattern = d, x = file_lines, fixed = TRUE)))

  # no match to delimiters
  if (all(count_sep_occur == 0L)) {
    cli::cli_abort( # nolint: return_linter
      c(
        "x" = "Unable to identify the separator of the file: {.file {file}}",
        "i" = "Expected one of {.val {accepted_field_sep}}!"
      ),
      call = rlang::caller_env(),
      wrap = FALSE
    )
  } else {
    # Choose delimiter with most matches
    best_delim <- names(x = which.max(x = count_sep_occur))
  }

  return(best_delim)
}

Try the OlinkAnalyze package in your browser

Any scripts or data that you put into this service are public.

OlinkAnalyze documentation built on June 24, 2026, 1:06 a.m.