R/import_thickness_csv.R

Defines functions import_thickness_csv

Documented in import_thickness_csv

#' Import the CSV produced by "Thickness Analysis" in OCT Explorer
#'
#' Imports and formats the CSV data produced by the "Thickness Analysis"
#' performed by OCT Explorer software. OCT Explorer is currently distributed for
#' Windows, and the "Surfaces" column contains the full path to each XML
#' segmentation file, producing unwieldy identifiers. Therefore, handy default
#' values for the parameters split, n, and pattern are provided for cleaning up
#' the values in the Surfaces column.
#'
#' @param csv_file the CSV file to import
#' @param scrub a string to remove from the file column generated by OCT Explorer
#' @param split a character string to use as the split pattern
#' @param n either "last" or an integer position
#' @param pattern character string to remove from the values in the Surfaces column
#'
#' @export
#' @importFrom magrittr %>%
#' @importFrom dplyr mutate filter select rowwise rename ungroup matches
#' @importFrom tidyr gather separate spread
#' @importFrom stringr str_replace_all
#' @importFrom readr read_csv
#' @importFrom rlang .data set_names
import_thickness_csv <- function(csv_file,
                                 split="\\\\",
                                 n="last",
                                 pattern="_Surfaces_Iowa.xml") {

    result <-
        read_csv(csv_file) %>%
        (function(x) x %>% select(-ncol(x))) %>%
        pivot_longer(
            names_to = "key",
            values_to = "um",
            cols = matches("Thickness")
            ) %>%
        mutate(
            key_stat = ifelse(grepl(.data$key, pattern="Mean"), "mean","sd"),
            key = gsub(.data$key,
                       pattern="^MeanThickness_|^SDThickness_",
                       replacement="", perl = TRUE)
            )

    # Remove the "%" symbol from any of the column names!
    result <- set_names(result, gsub(names(result), pattern="\\%",
                                    replacement = "percent", perl=TRUE))

    # Get the means
    result_mean <-
        result %>%
        filter(.data$key_stat == "mean") %>%
        spread(.data$key_stat, .data$um)

    # Get the standard deviations
    result_sd <-
        result %>%
        filter(.data$key_stat == "sd") %>%
        spread(.data$key_stat, .data$um)

    # 1. Combine the means and standard deviations.
    # 2. Rename the columns
    # 3. Add a sample ID by cleaning up the surfaces values
    result <-
        result_mean %>%
        inner_join(result_sd) %>%
        rename(
            surfaces = .data$Surfaces,
            laterality = .data$Laterality,
            oct_center_type = .data$OCTCenterType,
            oct_size_x_voxel = .data$OCTSizeX_voxel,
            oct_size_y_voxel = .data$OCTSizeY_voxel,
            oct_size_z_voxel = .data$OCTSizeZ_voxel,
            physical_size_x_mm = .data$PhysicalSizeX_mm,
            physical_size_y_mm = .data$PhysicalSizeY_mm,
            physical_size_z_mm = .data$PhysicalSizeZ_mm,
            voxel_size_x_um = .data$VoxelSizeX_um,
            voxel_size_y_um = .data$VoxelSizeY_um,
            voxel_size_z_um = .data$VoxelSizeZ_um,
            grid = .data$Grid,
            grid_center = .data$GridCenter,
            grid_center_x_pixel = .data$GridCenterX_pixel,
            grid_center_y_pixel = .data$GridCenterY_pixel,
            undefined_region_percent = .data$UndefinedRegion_percent
            ) %>%
        mutate(sample_id = as.character(.data$surfaces)) %>%
        mutate(sample_id = strsplit_nth(.data$sample_id,
                                       split = split,
                                       n = n,
                                       perl=TRUE)
               ) %>%
        mutate(sample_id = gsub(.data$sample_id, pattern = pattern, replacement = ""))

    result <-
        result %>%
        mutate(key = gsub(.data$key, pattern="_um$", replacement="", perl = TRUE)) %>%
        separate(.data$key, c("span","region"), sep="_Region", remove=TRUE) %>%
        rename(octexplorer_span = .data$span) %>%
        mutate(region = as.character(.data$region))

    return(result)
}
barefootbiology/heyexr documentation built on July 9, 2022, 3:35 a.m.