#' @title Loading data file
#' @description Loading all data
#' @param contig_lengths_file_path Path to contig lengths file
#' @param input_file_path Path to file generated by postpsass
#' @param common Whether to load common category data (default: TURE)
#' @param unique_1 Whether to load unique_2 category data (default: TURE)
#' @param unique_2 Whether to load unique_2 category data (default: TURE)
#' @param divergence_minor_allele Whether to load divergence_minor_allele category data (default: TURE)
#' @param divergence_major_allele Whether to load divergence_major_allele category data (default: TURE)
#' @param chromosomes_names_file_path Path to chromosomes names file
#' @param plot.unplaced Whether to plot unplaced scaffolds
#' @return Output all data in to a list
#' @export
load_data_files <- function(contig_lengths_file_path, input_file_path,
common = TRUE, unique_1 = TRUE,
unique_2 = TRUE, divergence_minor_allele = TRUE,
divergence_major_allele = TRUE,
chromosomes_names_file_path = NULL,
plot.unplaced = TRUE) {
output <- list()
print(" - Loading chromosomes names file")
output$names <- load_chromosomes_names(chromosomes_names_file_path)
print(" - Loading contig lengths file")
output$lengths <- load_contig_lengths(contig_lengths_file_path, chromosomes_names = output$names)
all_data <- load_single_data_file(input_file_path, output$lengths, plot.unplaced = plot.unplaced)
output$original_data <- all_data
if (common) {
print(" - Loading sliding window common SNPs data")
output$common <- all_data[,c(-4, -5, -6, -7)]
# output$common$Color = 1
}
if (unique_1) {
print(" - Loading sliding window unique_1 data")
output$unique_1 <- all_data[,c(-3, -5, -6, -7)]
}
if (unique_2) {
print(" - Loading sliding window unique_2 data")
output$unique_2 <- all_data[,c(-3, -4, -6, -7)]
# output$unique_2$Color = 3
}
if (divergence_minor_allele) {
print(" - Loading sliding window divergence_minor_allele data")
output$divergence_minor_allele <- all_data[,c(-3, -4, -5, -7)]
# output$divergence_minor_allele$Color = 4
}
if (divergence_major_allele) {
print(" - Loading sliding window divergence_major_allele data")
output$divergence_major_allele <- all_data[,c(-3, -4, -5, -6)]
# output$divergence_major_allele$Color = 5
}
return(output)
}
#' @title Loading a single data file generated by postpsass
#' @description Loading a single file generated by postpsass
#' @param input_file_path file generated by postpsass
#' @param chromosomes_names Generated by load_chromosomes_names_file
#' @param plot.unplaced Whether to plot unplaced scaffolds
#' @return Output data frame
load_contig_lengths <- function(input_file_path, chromosomes_names = NULL, plot.unplaced = TRUE) {
raw_data <- suppressMessages(read.delim(input_file_path, header=FALSE))
data <- raw_data$V2
names(data) <- raw_data$V1
output <- list()
if (!is.null(chromosomes_names)) { # If a chromosomes names file was provided, it is used to determine chromosomes
output$lg <- subset(data, names(data) %in% names(chromosomes_names) & chromosomes_names[names(data)] != "MT")
output$lg <- output$lg[gtools::mixedorder(chromosomes_names[names(output$lg)])]
output$unplaced <- subset(data, !(names(data) %in% names(chromosomes_names)))
} else { # Otherwise, try to determine chromosomes automatically (condition: start with LG / Ch / NC)
output$lg <- subset(data, substr(names(data), 1, 2) %in% c("LG", "lg", "Lg", "Ch", "ch", "CH", "NC"))
if (is.vector(output$lg) && length(output$lg) > 1) {
# Usually mitochondria is also called NC_xxx. If one chromosome is > 50 times smaller than the average of all ohter chromosomes,
# or it is smaller than 50000 bp, it is considered to be the mitochondria and is removed
putative_mt <- min(output$lg)
if (50 * putative_mt < median(output$lg) | putative_mt < 50000) output$lg <- output$lg[output$lg != putative_mt]
output$lg <- output$lg[gtools::mixedorder(names(output$lg))] # Order chromosomes based on their ID
} else if (length(output$lg) == 1) {
output$lg <- output$lg[-1] # If there is only one LG, it is assumed it is a mitochondria
}
output$unplaced <- subset(data, !(substr(names(data), 1, 2) %in% c("LG", "lg", "Ch", "ch", "MT", "mt", "NC")))
}
output$unplaced <- sort(output$unplaced, decreasing = TRUE) # Sort unplaced scaffolds by size
if (plot.unplaced) {
output$plot <- c(output$lg, "Unplaced" = sum(output$unplaced))
} else {
output$plot <- output$lg
}
return(output)
}
#' @title Loading a single data file generated by postpsass
#' @description Loading a single file generated by postpsass
#' @param input_file_path file generated by postpsass
#' @param contig_lengths Generated by load_contig_lengths function
#' @param plot.unplaced Whether to plot unplaced scaffolds
#' @return Output data frame
load_single_data_file <- function(input_file_path, contig_lengths, plot.unplaced = TRUE, snp_pos = FALSE) {
data <- suppressMessages(read.delim(input_file_path))
data_lg <- subset(data, data$Contig %in% names(contig_lengths$lg))
data_unplaced <- subset(data, data$Contig %in% names(contig_lengths$unplaced))
# If lgs were found, sort them and set their color index to 2 (for the plotting later)
if (dim(data_lg)[1] > 0) {
data_lg$Color <- rep(2, dim(data_lg)[1])
}
if (plot.unplaced & dim(data_unplaced)[1] > 0) { # If unplaced scaffolds should be grouped and there is at least one unplaced scaffold
# Order unplaced contigs data by contig length and then by position on the contig
data_unplaced <- data_unplaced[order(match(data_unplaced$Contig, names(contig_lengths$unplaced)), data_unplaced$Position), ]
# Attribute a color index to each unplaced contig, alternating between 0 and 1
order <- seq(1, length(unique(data_unplaced$Contig)))
names(order) <- unique(data_unplaced$Contig)
data_unplaced$Color <- order[data_unplaced$Contig] %% 2
# Transform position on each contig into position on cumulated contig
temp <- cumsum(contig_lengths$unplaced) - contig_lengths$unplaced[1]
data_unplaced$Original_position <- data_unplaced$Position
data_unplaced$Position <- data_unplaced$Position + temp[data_unplaced$Contig]
data_unplaced$Contig_id <- data_unplaced$Contig
data_unplaced$Contig <- "Unplaced"
# Regroup data into one data frame
data_lg$Contig_id <- data_lg$Contig
data_lg$Original_position <- data_lg$Position
data <- rbind(data_lg, data_unplaced)
data$Contig <- factor(data$Contig, levels = c(names(contig_lengths$lg), "Unplaced"))
} else {
data <- data_lg
data$Original_position <- data$Position
data$Contig <- factor(data$Contig, levels = names(contig_lengths$lg))
}
return(data)
}
#' @title Loading chromosomes name file
#' @description Two column should provide in this file: original name and corresponding names for sector name
#' @param input_file_path Path to chromosomes name file
#' @return Data.frame of chromosomes name file
load_chromosomes_names <- function(input_file_path) {
if (!is.null(input_file_path)) {
raw_data <- suppressMessages(read.delim(input_file_path, header=FALSE))
data <- as.character(raw_data$V2)
names(data) <- raw_data$V1
data <- gtools::mixedsort(data)
return(data)
} else {
return(NULL)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.