R/bruker.R

Defines functions nmr_read_bruker_fid nmr_zip_bruker_samples bruker_merge_meta_pdata read_bruker_sample read_bruker_metadata infer_dim_pulse_nuclei read_bruker_pdata read_pdata_title_file parse_title_file read_orig_file guess_shape_and_submatrix_shape read_bin_data read_levels read_acqus_file read_procs_file convert_field read_bruker_param

Documented in nmr_read_bruker_fid nmr_zip_bruker_samples read_bruker_pdata

# These functions deal with reading Bruker NMR samples.


# Authors: Sergio Oller, Ivan Montoliu
# This code is a port of MATLAB routines written by Ivan Montoliu and Ziad Ramadan.
# The NMR import core functions are based on MATLAB code written by Nils Nyberg
# and the nmrglue python package.
#
# The nmrglue python package does seem to have a more comprehensive set of
# import routines for NMR data (and in particular for Bruker data).
# http://www.nmrglue.com/


#' Reads a BRUKER parameter file to a list
#'
#' @param file_name File name of a BRUKER parameter file
#' @return list of parameter-value pairs
#' @keywords internal
#' @noRd
read_bruker_param <- function(file_name) {
    lines <- readLines(file_name)
    # There are vectors, values, stamps... many types of values
    # We will use regular expressions to detect each type of value and to
    # extract the relevant field name / field value.
    type_of_row <- NA * numeric(length(lines))
    all_matches <- vector("list", length(lines))

    # Definition of the regular expressions and the type name.
    # The order is important. If for a given line there is a match on the first
    # row, the rest of the regular expressions for that row are ignored.
    R <- as.data.frame(matrix(
        c(
            "^##\\$*(.+)=\\s?\\(\\d\\.\\.\\d+\\)(.+)$",
            "ParVecVal",
            "^##\\$*(.+)=\\s?\\(\\d\\.\\.\\d+\\)$",
            "ParVec",
            "^##\\$*(.+)=\\s?(.+)$",
            "ParVal",
            "^([^\\$#].*)$",
            "Val",
            "^\\$\\$(.+)$",
            "Stamp",
            "^##\\$*(.+)=$",
            "ParEmpty",
            "^\\s*$",
            "Empty"
        ),
        ncol = 2,
        byrow = TRUE
    ))
    colnames(R) <- c("pattern", "name")

    # Match each pattern to all lines. If there is a match and there was no
    # previous match then save the matching result.
    for (pat_idx in seq_len(nrow(R))) {
        lines_matched <-
            stringr::str_match_all(lines, R[["pattern"]][pat_idx])
        lines_with_match <- purrr::map_int(lines_matched, nrow) > 0
        new_lines_with_match <-
            is.na(type_of_row) & lines_with_match
        type_of_row[new_lines_with_match] <- pat_idx
        # Update the new lines that have match:
        all_matches[new_lines_with_match] <-
            lines_matched[new_lines_with_match]
    }
    # Use "ParVec"-like names instead of numbers, it is easier to read:
    if (anyNA(type_of_row)) {
        lines_no_match <- which(is.na(type_of_row))
        stop("Line without a match:", paste(lines_no_match, collapse = " "))
    }
    type_of_row <- R[["name"]][type_of_row]
    # Generate the output list. We will use the matched information of each line
    output <- list()
    for (i in seq_len(length(type_of_row))) {
        if (type_of_row[i] == "ParVecVal") {
            field_name <- all_matches[[i]][1, 2]
            output[[field_name]] <- all_matches[[i]][1, 4]
        } else if (type_of_row[i] == "ParVec") {
            field_name <- all_matches[[i]][1, 2]
            output[[field_name]] <- NULL
        } else if (type_of_row[i] == "ParVal") {
            field_name <- all_matches[[i]][1, 2]
            output[[field_name]] <- all_matches[[i]][1, 3]
        } else if (type_of_row[i] == "Val") {
            if (is.null(output[[field_name]])) {
                output[[field_name]] <- all_matches[[i]][1, 2]
            } else {
                output[[field_name]] <-
                    paste(output[[field_name]], all_matches[[i]][1, 2])
            }
        } else if (type_of_row[i] == "Stamp") {
            output[["Stamp"]] <- c(output[["Stamp"]], all_matches[[i]][1, 2])
        } else if (type_of_row[i] == "ParEmpty") {
            field_name <- all_matches[[i]][1, 2]
            output[[field_name]] <- NULL
        } else if (type_of_row[i] == "Empty") {
            # do nothing
        } else {
            # If this happens it means that the R matrix on top has a problem or that
            # there is a not-considered case here.
            stop("This should not have happened!")
        }
    }

    for (field in names(output)) {
        output[[field]] <- convert_field(output[[field]])
    }
    return(output)
}

convert_field <- function(element) {
    # Convert values to numeric if possible
    converted_val <- element
    tryCatch(
        {
            # First remove spaces
            cleaned_spaces <- stringr::str_trim(element)
            if (nchar(cleaned_spaces[1]) == 0) {
                return(cleaned_spaces)
            }
            # Then make sure there is a space between numbers (it's apparently optional):
            cleaned_spaces <-
                gsub("([0-9.])([+-])", "\\1 \\2", cleaned_spaces)
            # Then split by one or more spaces
            converted_val2 <-
                strsplit(cleaned_spaces, split = "[[:blank:]]+")[[1]]
            converted_val <- as.numeric(converted_val2)
        },
        warning = function(warn) {
            # Do nothing
        },
        error = function(err) {
            # Do nothing
        }
    )
    # If it is still a string, remove leading < and trailing >
    if (is.character(converted_val) && length(converted_val) == 1) {
        if (grepl(pattern = "^\\s*<.*>\\s*$", x = converted_val)) {
            converted_val <-
                gsub(
                    pattern = "^\\s*<(.*)>\\s*$",
                    replacement = "\\1",
                    x = converted_val
                )
        }
    }
    return(converted_val)
}


#' Reads a Bruker procs file
#'
#' @param full_pdata_path The path to the procs files
#' @param procs_files a character vector with the procs file names. If
#'                    procs_files is NULL default sensible names are tested.
#' @return a list with a list of parameters for each procs file given
#' @keywords internal
#' @noRd
read_procs_file <- function(full_pdata_path, procs_files = NULL) {
    if (is.null(procs_files)) {
        procs_files <- c("procs", "proc2s", "proc3s", "proc4s")
        procs_files <-
            procs_files[file.exists(file.path(full_pdata_path, procs_files))]
    }
    output <- list()
    for (file in procs_files) {
        output[[file]] <-
            read_bruker_param(file_name = file.path(full_pdata_path, file))
    }
    return(output)
}

#' Reads all common acqus files
#'
#' @param sample_path A directory that corresponds to a sample
#' @param acqus_files The acqus files that you want to load. if \code{NULL} the
#'                    default set of acqus files is used.
#' @return a list with a list of parameters for each acqus file given
#' @keywords internal
#' @noRd
read_acqus_file <- function(sample_path, acqus_files = NULL) {
    if (is.null(acqus_files)) {
        acqus_files <- c("acqus", "acqu2s", "acqu3s", "acqu4s")
        acqus_files <-
            acqus_files[file.exists(file.path(sample_path, acqus_files))]
    }
    output <- list()
    for (file in acqus_files) {
        output[[file]] <-
            read_bruker_param(file_name = file.path(sample_path, file))
    }
    return(output)
}


read_levels <-
    function(full_pdata_path,
    endian = NULL,
    NC_proc = NULL) {
        levels_vec <- NULL
        # Read the level file if it exists
        # The old version (level) is a binary
        file_level <- file.path(full_pdata_path, "level")
        if (file.exists(file_level)) {
            if (is.null(endian) || is.null(NC_proc)) {
                warning("Can't read old level file without endian information and NC_proc")
            }
            lev <-
                read_bin_data(file_name = file_level, endian = endian)
            # The first two figures is the number of pos. and neg. levels
            levels_vec <- lev[3:length(lev)]
            # Adjust for NC-parameter
            levels_vec <- levels_vec / (2^-NC_proc)
        } else {
            levels_vec <- NULL
        }

        # Read the clevel file if it exists
        # The new version clevel is a text file
        file_clevel <- file.path(full_pdata_path, "clevels")
        if (file.exists(file_clevel)) {
            clev <- read_bruker_param(file_name = file_clevel)
            if (clev[["LEVSIGN"]] == 1) {
                levels_vec <- clev[["LEVELS"]][clev[["LEVELS"]] > 0]
            } else if (clev[["LEVSIGN"]] == 2) {
                levels_vec <- clev[["LEVELS"]][clev[["LEVELS"]] < 0]
            } else if (clev[["LEVSIGN"]] == 3) {
                levels_vec <- clev$LEVELS[seq_len(clev[["MAXLEV"]]) * 2]
            } else {
                # this case was ommited in the MATLAB function, so maybe the
                # stop can be ignored. If you reach this point please check carefully.
                stop("Unexpected clevels case")
            }
        }
        # Check that levels is not one (large) scalar.
        if (length(levels_vec) == 1) {
            levels_vec <- c(levels_vec, levels_vec)
        }
        return(list(levels = levels_vec))
    }

#' Read binary Bruker NMR data
#'
#' @param file_name The file name to read
#' @param endian Passed to \code{readBin}.
#' @keywords internal
#' @noRd
read_bin_data <- function(file_name, endian) {
    tryCatch(
        {
            con <- file(file_name, "rb")
            # if file size is 512kb, or 131072 integers
            # By using a read size of 131073 integers in one read I know if I have
            # reached the end.
            chunksize <- 131073
            num_reads <- 1
            data <- readBin(
                con,
                what = "integer",
                n = chunksize,
                size = 4,
                signed = TRUE,
                endian = endian
            )
            while (length(data) == num_reads * chunksize) {
                data <- c(
                    data,
                    readBin(
                        con,
                        what = "integer",
                        n = chunksize,
                        size = 4,
                        signed = TRUE,
                        endian = endian
                    )
                )
                num_reads <- num_reads + 1
            }
        },
        finally = {
            close(con)
        }
    )
    return(data)
}



guess_shape_and_submatrix_shape <- function(sample) {
    output <-
        list(
            dimension = NULL,
            shape = NULL,
            submatrix_shape = NULL
        )
    if (!"procs" %in% names(sample)) {
        return(output)
    }
    procs <- sample[["procs"]]
    if (!all(c("SI", "XDIM") %in% names(procs))) {
        return(output)
    }
    si_0 <- procs[["SI"]]
    xdim_0 <- procs[["XDIM"]]

    if (!"proc2s" %in% names(sample)) {
        # 1D data
        output[["dimension"]] <- 1
        output[["shape"]] <- si_0
        output[["submatrix_shape"]] <- xdim_0
        return(output)
    }

    proc2s <- sample[["proc2s"]]
    if (!all(c("SI", "XDIM") %in% names(proc2s))) {
        return(output)
    }

    si_1 <- proc2s[["SI"]]
    xdim_1 <- proc2s[["XDIM"]]


    if (!"proc3s" %in% names(sample)) {
        # 2D data
        output[["dimension"]] <- 2
        output[["shape"]] <- c(si_0, si_1)
        output[["submatrix_shape"]] <- c(xdim_0, xdim_1)
        return(output)
    }


    proc3s <- sample[["proc3s"]]
    if (!all(c("SI", "XDIM") %in% names(proc3s))) {
        return(output)
    }

    si_2 <- proc3s[["SI"]]
    xdim_2 <- proc3s[["XDIM"]]

    if (!"proc4s" %in% names(sample)) {
        # 3D data
        output[["dimension"]] <- 3
        output[["shape"]] <- c(si_0, si_1, si_2)
        output[["submatrix_shape"]] <- c(xdim_0, xdim_1, xdim_2)
        return(output)
    }


    proc4s <- sample[["proc4s"]]
    if (!all(c("SI", "XDIM") %in% names(proc4s))) {
        return(output)
    }

    si_3 <- proc4s[["SI"]]
    xdim_3 <- proc4s[["XDIM"]]
    # Assume 4D
    output[["dimension"]] <- 4
    output[["shape"]] <- c(si_0, si_1, si_2, si_3)
    output[["submatrix_shape"]] <- c(xdim_0, xdim_1, xdim_2, xdim_3)
    return(output)
}

#' Read orig Bruker NMR file
#' @param sample_path A character path of the sample directory
#' @return a list with name-value pairs
#' @keywords internal
#' @noRd
read_orig_file <- function(sample_path) {
    orig_file <- file.path(sample_path, "orig")
    if (!file.exists(orig_file)) {
        return(NULL)
    }
    orig_lines <- readLines(orig_file)
    lines_split <- strsplit(orig_lines, split = " ", fixed = TRUE)
    all_names <- purrr::map_chr(lines_split, 1)
    all_vals <- purrr::map_chr(lines_split, function(line) {
        paste0(line[2:length(line)], collapse = " ")
    })
    output <- list()
    output[all_names] <- all_vals
    return(output)
}

parse_title_file <- function(title_lines) {
    # Two options:
    #         (a) On each line you find: "Name Value" (and optionally a " ;" in the end)
    #         (b) On each line you find        "Value" (no names)
    # What option do we have? Lets guess:
    lines_split <- strsplit(title_lines, split = " ", fixed = TRUE)
    number_of_fields_per_line <- purrr::map_int(lines_split, length)

    # Case (a): (and empty lines are accepted)
    if (all(number_of_fields_per_line >= 2)) {
        all_names <- purrr::map_chr(lines_split, 1)
        all_vals <- purrr::map_chr(lines_split, function(line) {
            value <- paste0(line[2:length(line)], collapse = " ")
            # Remove spaces and ";" at the end of the value, if they are present:
            gsub(
                pattern = "[\\s;]+$",
                replacement = "",
                x = value
            )
        })
        output <- list()
        output[all_names] <- all_vals
        return(output)
    }
    # Case (b):
    all_names <- paste0("V", seq_len(length(title_lines)))
    output <- as.list(title_lines)
    names(output) <- all_names
    return(output)
}

#' Read pdata title Bruker NMR file
#' @param sample_path A character path of the sample directory
#' @param pdata_path Path from `sample_path` to the preprocessed data
#' @return a list with name-value pairs. If the title file has no field names,
#'                                 then fields are named V1, V2...
#' @keywords internal
#' @noRd
read_pdata_title_file <-
    function(sample_path, pdata_path = "pdata/1") {
        title_file <- file.path(sample_path, pdata_path, "title")
        if (!file.exists(title_file)) {
            return(NULL)
        }
        title_lines <- readLines(title_file, warn = FALSE)
        return(parse_title_file(title_lines))
    }

#' Read processed Bruker NMR data
#'
#' @param pdata_file File name of the binary NMR data to load. Usually "1r".
#'                   If `NULL`, it is autodetected based on the dimension
#' @param sample_path A character path of the sample directory
#' @param pdata_path Path from `sample_path` to the preprocessed data
#' @param all_components If `FALSE` load only the real component. Otherwise load the real and imaginary components
#' @param read_pdata_title If `TRUE` also reads metadata from pdata title file.
#' @return A list with Bruker NMR processed data
#' @keywords internal
read_bruker_pdata <- function(sample_path,
    pdata_file = NULL,
    pdata_path = "pdata/1",
    all_components = FALSE,
    read_pdata_title = TRUE) {
    full_pdata_path <- file.path(sample_path, pdata_path)
    if (is.null(pdata_file)) {
        # determine the dimension
        if (file.exists(file.path(full_pdata_path, "1r"))) {
            if (all_components) {
                pdata_file <- c("1r", "1i")
            } else {
                pdata_file <- "1r"
            }
        } else if (file.exists(file.path(full_pdata_path, "2rr"))) {
            if (all_components) {
                pdata_file <- c("2rr", "2ri", "2ir", "2ii")
            } else {
                pdata_file <- "2rr"
            }
        } else if (file.exists(file.path(full_pdata_path, "3rrr"))) {
            if (all_components) {
                pdata_file <- c(
                    "3rrr",
                    "3rri",
                    "3rir",
                    "3rii",
                    "3irr",
                    "3iri",
                    "3iir",
                    "3iii"
                )
            } else {
                pdata_file <- "3rrr"
            }
        }
        pdata_file <-
            pdata_file[file.exists(file.path(full_pdata_path, pdata_file))]
    }

    full_pdata_file <- file.path(full_pdata_path, pdata_file)
    if (length(full_pdata_file) == 0) {
        stop("No pdata files found to be loaded for sample ", sample_path)
    }
    if (!all(file.exists(full_pdata_file))) {
        stop(
            "File does not exist: ",
            paste(full_pdata_file[!file.exists(full_pdata_file)], collapse = ", ")
        )
    }

    # Read parameters
    procs_list <- read_procs_file(full_pdata_path)

    output <- list()
    output <- c(output, procs_list)

    if (!("procs" %in% names(output))) {
        stop("Missing procs file")
    }

    # Open and read file
    if (output$procs$BYTORDP == 0) {
        endian <- "little"
    } else {
        endian <- "big"
    }

    if (isTRUE(read_pdata_title)) {
        title <-
            read_pdata_title_file(sample_path = sample_path, pdata_path = pdata_path)
        output$title <- title
    }

    for (filename in pdata_file) {
        field_name <- paste0("data_", filename)
        full_filename <- file.path(full_pdata_path, filename)
        output[[field_name]] <-
            read_bin_data(full_filename, endian = endian)
        output[[field_name]] <-
            output[[field_name]] / (2^-output$procs$NC_proc)
    }

    output$levels <- read_levels(full_pdata_path,
        endian = endian,
        NC_proc = output$procs$NC_proc
    )

    data_shapes <- guess_shape_and_submatrix_shape(output)

    dimension <- data_shapes$dimension
    if (is.null(dimension)) {
        warning("Can't determine dimension. Are procs files loaded? Assuming dimension = 1")
        dimension <- 1
    }

    output$axis <- vector("list", length = dimension)

    if (dimension >= 1) {
        # Calculate x-axis
        output$axis[[1]] <- seq(
            from = output$procs$OFFSET,
            to = output$procs$OFFSET - output$procs$SW_p /
                output$procs$SF,
            length.out = output$procs$SI
        )
    }
    if (dimension >= 2) {
        # Additional axis and reordering if 2D-file
        output$axis[[2]] <- seq(
            from = output$proc2s$OFFSET,
            to = output$proc2s$OFFSET - output$proc2s$SW_p /
                output$proc2s$SF,
            length.out = output$proc2s$SI
        )
    }
    if (dimension >= 3) {
        output$axis[[3]] <- seq(
            from = output$proc3s$OFFSET,
            to = output$proc3s$OFFSET - output$proc3s$SW_p /
                output$proc3s$SF,
            length.out = output$proc3s$SI
        )
    }
    if (dimension >= 4) {
        output$axis[[4]] <- seq(
            from = output$proc4s$OFFSET,
            to = output$proc4s$OFFSET - output$proc4s$SW_p /
                output$proc4s$SF,
            length.out = output$proc4s$SI
        )
    }

    # Reorder submatrices (se XWinNMR-manual, chapter 17.5 (95.3))
    if (data_shapes$dimension == 2) {
        SI1 <- data_shapes$shape[1]
        # SI2 <- data_shapes$shape[2]
        # XDIM1 <- data_shapes$submatrix_shape[1]
        XDIM2 <- data_shapes$submatrix_shape[2]
        NoSM_along_dimensions <-
            data_shapes$shape / data_shapes$submatrix_shape
        NoSM2 <- NoSM_along_dimensions[length(NoSM_along_dimensions)] # No of SM along F1
        NoSM <-
            cumprod(NoSM_along_dimensions) # cummulative total number of Submatrices along dimensions
        num_submatrices <- NoSM[length(NoSM)]
        for (filename in pdata_file) {
            field_name <- paste0("data_", filename)
            dim(output[[field_name]]) <-
                c(data_shapes$submatrix_shape, num_submatrices)
            output[[field_name]] <-
                aperm(output[[field_name]], c(2, 1, 3))
            dim(output[[field_name]]) <- c(XDIM2, SI1, NoSM2)
            output[[field_name]] <-
                aperm(output[[field_name]], c(2, 1, 3))
            dim(output[[field_name]]) <- data_shapes$shape
        }
    }
    if (data_shapes$dimension > 2) {
        stop("Can't deal with dimensions larger than two")
    }

    return(output)
}

#' Infer dimension, pulse sequence and nuclei given the acqus file information
#'
#' This function determines the dimensionality of the sample, infers the pulse
#' sequence (based on the experiment name, that can be read on the acqus file
#' -in the EXP- field) and sets the sample nuclei field.
#'
#' The dimension is based on the number of acqus, acqu2s, acqu3s existant files
#'
#' The pulse sequence assignment is based on parsing the experiment name:
#' experiment name contains -> pulse sequence assigned
#' \itemize{
#' \item{NOESY -> NOESY}
#' \item{NOEZY -> NOESY (alias)}
#' \item{CPMG -> CPMG}
#' \item{DIFF -> DIFF}
#' \item{JRES -> JRES}
#' \item{COSY -> COSY}
#' \item{TOCSY -> TOCSY}
#' \item{DIPSI -> TOCSY (alias)}
#' \item{MLEV -> TOCSY (alias)}
#' \item{HSQC -> HSQC}
#' \item{HMBC -> HMBC}
#' \item{PROTON -> PROTON}
#' }
#'
#' The nuclei in the sample are set according to the pulse sequence and the
#' acqus parameters, following Sofia's advice.
#'
#' @param acqus_list The list of information retrieved by \code{read_acqus_file}
#' @return A list with dimension, pulse_sequence and nuclei
#' @keywords internal
#' @noRd
#'
infer_dim_pulse_nuclei <- function(acqus_list) {
    output <- list(
        dimension = NA_integer_,
        pulse_sequence = NA_character_,
        nuclei = NA_character_
    )
    # The dimension is easy
    output$dimension <- length(acqus_list)

    # The pulse sequence is not that obvious
    experiment_name <- acqus_list$acqus$EXP
    # NUC1... NUC8 help to tell us the nuclei present
    NUCLEI <- paste0("NUC", seq_len(8))

    if (grepl(
        pattern = "NOESY",
        x = experiment_name,
        ignore.case = TRUE
    ) ||
        grepl(
            pattern = "NOEZY",
            x = experiment_name,
            ignore.case = TRUE
        )) {
        output$pulse_sequence <- "NOESY"
        output$nuclei <- acqus_list$acqus[["NUC1"]]
    } else if (grepl(
        pattern = "CPMG",
        x = experiment_name,
        ignore.case = TRUE
    )) {
        output$pulse_sequence <- "CPMG"
        output$nuclei <- acqus_list$acqus[["NUC1"]]
    } else if (grepl(
        pattern = "DIFF",
        x = experiment_name,
        ignore.case = TRUE
    )) {
        output$pulse_sequence <- "DIFFUSION"
        output$nuclei <- acqus_list$acqus[["NUC1"]]
    } else if (grepl(
        pattern = "JRES",
        x = experiment_name,
        ignore.case = TRUE
    )) {
        output$pulse_sequence <- "JRES"
        output$nuclei <- acqus_list$acqus[["NUC1"]]
    } else if (grepl(pattern = "PROTON", x = experiment_name, ignore.case = TRUE)) {
        output$pulse_sequence <- "PROTON"
        output$nuclei <- acqus_list$acqus[["NUC1"]]
    } else if (grepl(
        pattern = "COSY",
        x = experiment_name,
        ignore.case = TRUE
    )) {
        output$pulse_sequence <- "COSY"
        output$nuclei <-
            paste0(acqus_list$acqus[["NUC1"]], "-", acqus_list$acqu2s[["NUC1"]])
    } else if (grepl(
        pattern = "TOCSY",
        x = experiment_name,
        ignore.case = TRUE
    ) ||
        grepl(
            pattern = "MLEV",
            x = experiment_name,
            ignore.case = TRUE
        ) ||
        grepl(
            pattern = "DIPSI",
            x = experiment_name,
            ignore.case = TRUE
        )) {
        output$pulse_sequence <- "TOCSY"
        output$nuclei <-
            paste0(acqus_list$acqus[["NUC1"]], "-", acqus_list$acqu2s[["NUC1"]])
    } else if (grepl(
        pattern = "HSQC",
        x = experiment_name,
        ignore.case = TRUE
    )) {
        output$pulse_sequence <- "HSQC"
        output$nuclei <-
            paste(acqus_list$acqus[NUCLEI][acqus_list$acqus[NUCLEI] != "off"], collapse = "-")
    } else if (grepl(
        pattern = "HMBC",
        x = experiment_name,
        ignore.case = TRUE
    )) {
        output$pulse_sequence <- "HMBC"
        output$nuclei <-
            paste(acqus_list$acqus[NUCLEI][acqus_list$acqus[NUCLEI] != "off"], collapse = "-")
    } # else {
    #    warning(
    #        "infer_dim_pulse_nuclei: Unknown Pulse Sequence in acqus$EXP field.\n",
    #        "Please add the pulse sequence '",
    #        experiment_name,
    #        "' to infer_dim_pulse_nuclei in R/bruker.R\n"
    #    )
    # }

    return(output)
}

#' Read Bruker NMR metadata
#'
#' @param sample_path A character path of the sample directory
#' @param pdata_path Optional character path of the processed data (to read pdata title file)
#' @param read_pdata_title logical. If `TRUE` reads the `pdata/1/title`
#'                         file if it exists.
#' @return A list with the read metadata
#' @noRd
read_bruker_metadata <-
    function(sample_path,
    pdata_path = "pdata/1",
    read_pdata_title = TRUE) {
        # Read parameters
        acqus_list <- read_acqus_file(sample_path)

        if (length(acqus_list) == 0) {
            stop("No acqus file found in ", sample_path)
        }
        info <- list()
        info$NMRExperiment <- basename(sample_path)
        info$file_format <- "Bruker NMR directory"
        info$sample_path <- sample_path
        info <- c(info, infer_dim_pulse_nuclei(acqus_list))

        orig <- read_orig_file(sample_path = sample_path)
        # Store acquisition time information
        info$acq_datetime <- as.POSIXct(acqus_list$acqus$Stamp[1])



        output <- list(info = info, orig = orig)

        if (isTRUE(read_pdata_title)) {
            pdata_1_title <- read_pdata_title_file(
                sample_path = sample_path,
                pdata_path = pdata_path
            )
            output$title <- pdata_1_title
        }

        output <- c(output, acqus_list)

        if (!("acqus" %in% names(output))) {
            stop("Missing acqus file")
        }


        # if (!any(c("fid", "ser") %in% list.files(sample_path))) {
        #     stop("No raw data available in ", sample_path)
        # }
        return(output)
    }

#' Read a Bruker sample directory
#'
#' @param pdata_file File name of the binary NMR data to load. Usually "1r".
#'                   If it is null it is autodetected and all files are loaded.
#' @param sample_path A character path of the sample directory
#' @param pdata_path Path from `sample_path` to the preprocessed data
#' @param all_components If `FALSE` load only the real component. Otherwise load all of them
#' @return a list with all the bruker sample information
#' @noRd
read_bruker_sample <- function(sample_path,
    pdata_file = NULL,
    pdata_path = "pdata/1",
    all_components = FALSE) {
    meta <- read_bruker_metadata(sample_path,
        pdata_path,
        read_pdata_title = TRUE
    )
    pdata <- read_bruker_pdata(
        sample_path = sample_path,
        pdata_file = pdata_file,
        pdata_path = pdata_path,
        all_components = all_components,
        read_pdata_title = FALSE
    ) # pdata title already read

    output <- bruker_merge_meta_pdata(meta, pdata)
    return(output)
}

bruker_merge_meta_pdata <- function(meta, pdata) {
    # This function concatenates the metadata and the processed data
    # This is a trivial function, but I prefer to define it because if something
    # more complex needs to be done in the merging in the future we can make sure
    # that any function trying to merge metadata and processed data in the future
    # will continue working if it calls this.
    return(c(meta, pdata))
}

#' Create one zip file for each brucker sample path
#'
#' @param path Character vector with sample directories
#' @param workdir Directory to store zip files
#' @param overwrite Should existing zip files be overwritten?
#' @param ... Passed to [utils::zip]
#' @return A character vector of the same length as path, with the zip file names
#' @family import/export functions
#' @export
#' @examples
#' save_zip_files_to <- tempfile(pattern = "zip_file_storage_")
#' where_your_samples_are <- tempfile(pattern = "where_your_samples_are")
#' # prepare sample:
#' zip::unzip(
#'   system.file("dataset-demo", "10.zip", package = "AlpsNMR"),
#'   exdir = where_your_samples_are
#' )
#' 
#' outpaths <- nmr_zip_bruker_samples(
#'     list.files(where_your_samples_are, full.names = TRUE),
#'     workdir = save_zip_files_to
#' )
nmr_zip_bruker_samples <-
    function(path, workdir, overwrite = FALSE, ...) {
        current_wd <- getwd()
        dir.create(workdir, recursive = TRUE, showWarnings = FALSE)
        workdir <- normalizePath(workdir)
        outpaths <- rep(NA_character_, length(path))
        # tryCatch to make sure the working directory is restored
        # The zip utility in R is very basic. On Windows it relies on the zip.exe
        # program being installed and in the path. zip.exe is found on RTools.
        tryCatch(
            {
                for (i in seq_len(length(path))) {
                    path_i <- path[i]
                    # The directory where the NMR sample is: (e.g. "/nihs/Instrument/.../DUND-plasma/10")
                    dir_to_compress <- normalizePath(path_i)
                    # dir_name: (e.g. "10")
                    dir_name <- basename(dir_to_compress)
                    # parent_dir: "/nihs/Instrument/.../DUND-plasma"
                    parent_dir <-
                        normalizePath(dirname(dir_to_compress))
                    # destination_file: /tmp/my_temp_dir/10.zip
                    destination_file <-
                        file.path(workdir, paste0(dir_name, ".zip"))
                    outpaths[i] <- destination_file
                    if (file.exists(destination_file)) {
                        if (overwrite) {
                            # deletes destination zip file if overwrite is true
                            unlink(destination_file)
                        } else {
                            # Skips otherwise
                            warning("File ", destination_file, " already exists. Skipping")
                            next
                        }
                    }
                    # Change to the parent_dir so the zip file does not include the whole
                    # directory tree folders
                    setwd(parent_dir)
                    # Zip it!
                    utils::zip(zipfile = destination_file, files = dir_name, ...)
                    # Go back to the initial directory, so the next file can be
                    # found if relative paths are used
                    setwd(current_wd)
                }
            },
            finally = {
                # Always restore the working directory, even if there are errors somewhere
                setwd(current_wd)
            }
        )
        return(outpaths)
    }

#' Read Free Induction Decay file
#'
#' Reads an FID file. This is a very simple function.
#'
#' @param sample_name A single sample name
#' @param endian Endianness of the fid file ("little" by default, use "big" if acqus$BYTORDA == 1)
#' @return A numeric vector with the free induction decay values
#' @export
#' @family import/export functions
#' @examples
#' fid <- nmr_read_bruker_fid("sample.fid")
nmr_read_bruker_fid <- function(sample_name, endian = "little") {
    if (file.exists(file.path(sample_name, "fid"))) {
        fid_file <- file.path(sample_name, "fid")

        num_numbers <- file.size(fid_file) / 8
        fid <-
            readBin(
                fid_file,
                what = "integer",
                n = num_numbers,
                size = 4,
                signed = TRUE,
                endian = endian
            )
    } else {
        fid <- NULL
    }
    fid
}
sipss/AlpsNMR documentation built on Aug. 13, 2024, 5:11 p.m.