R/load_single_data_file.R

Defines functions load_single_data_file

Documented in load_single_data_file

#' @title Load a single data file
#'
#' @description Loads a single data file generated by PSASS
#'
#' @param input_file_path Path to a data file.
#'
#' @param contig_lengths A list of contig lengths obtained with the \code{\link{load_contig_lengths}} function.
#'
#' @param plot.unplaced If TRUE, unplaced contigs will be plotted as a supercontig (default TRUE).
#'
#' @return A data frame storing the data from the input file.
#'
#' @examples
#' data <- load_single_data_file("fst_window.tsv", contig_lengths, plot.unplaced = FALSE)


load_single_data_file <- function(input_file_path, contig_lengths, plot.unplaced = TRUE, snp_pos = FALSE) {

    if (snp_pos == TRUE) {

        data <- suppressMessages(readr::read_delim(input_file_path, "\t", escape_double = FALSE, trim_ws = TRUE, col_types = 'cifdddddddddddd'))

    } else {

        data <- suppressMessages(readr::read_delim(input_file_path, "\t", escape_double = FALSE, trim_ws = TRUE))

    }

    data_lg <- subset(data, data$Contig %in% names(contig_lengths$lg))
    data_unplaced <- subset(data, data$Contig %in% names(contig_lengths$unplaced))

    # If lgs were found, sort them and set their color index to 2 (for the plotting later)
    if (dim(data_lg)[1] > 0) {

        data_lg$Color <- rep(2, dim(data_lg)[1])

    }

    if (plot.unplaced & dim(data_unplaced)[1] > 0) {  # If unplaced scaffolds should be grouped and there is at least one unplaced scaffold

        # Order unplaced contigs data by contig length and then by position on the contig
        data_unplaced <- data_unplaced[order(match(data_unplaced$Contig, names(contig_lengths$unplaced)), data_unplaced$Position), ]

        # Attribute a color index to each unplaced contig, alternating between 0 and 1
        order <- seq(1, length(unique(data_unplaced$Contig)))
        names(order) <- unique(data_unplaced$Contig)
        data_unplaced$Color <- order[data_unplaced$Contig] %% 2

        # Transform position on each contig into position on cumulated contig
        temp <- cumsum(contig_lengths$unplaced) - contig_lengths$unplaced[1]
        data_unplaced$Original_position <- data_unplaced$Position
        data_unplaced$Position <- data_unplaced$Position + temp[data_unplaced$Contig]
        data_unplaced$Contig_id <- data_unplaced$Contig
        data_unplaced$Contig <- "Unplaced"

        # Regroup data into one data frame
        data_lg$Contig_id <- data_lg$Contig
        data_lg$Original_position <- data_lg$Position
        data <- rbind(data_lg, data_unplaced)
        data$Contig <- factor(data$Contig, levels = c(names(contig_lengths$lg), "Unplaced"))

    } else {

        data <- data_lg
        data$Original_position <- data$Position
        data$Contig <- factor(data$Contig, levels = names(contig_lengths$lg))

    }

    return(data)
}
INRA-LPGP/PoolSex-vis documentation built on March 7, 2020, 6:03 p.m.