Nothing
#' @name gl.ld.haplotype
#' @title Visualize patterns of linkage disequilibrium and identification of
#' haplotypes
#' @description
#' This function plots a Linkage disequilibrium (LD) heatmap, where the colour
#' shading indicates the strength of LD. Chromosome positions (Mbp) are shown on
#' the horizontal axis, and haplotypes appear as triangles and delimited by
#' dark yellow vertical lines. Numbers identifying each haplotype are shown in
#' the upper part of the plot.
#'
#' The heatmap also shows heterozygosity for each SNP.
#'
#' The function identifies haplotypes based on contiguous SNPs that are in
#' linkage disequilibrium using as threshold \code{ld_threshold_haplo} and
#' containing more than \code{min_snps} SNPs.
#'
#' @param x Name of the genlight object containing the SNP data [required].
#' @param pop_name Name of the population to analyse. If NULL all the
#' populations are analised [default NULL].
#' @param chrom_name Nme of the chromosome to analyse. If NULL all the
#' chromosomes are analised [default NULL].
#' @param ld_max_pairwise Maximum distance in number of base pairs at which LD
#' should be calculated [default 10000000].
#' @param maf Minor allele frequency (by population) threshold to filter out
#' loci. If a value > 1 is provided it will be interpreted as MAC (i.e. the
#' minimum number of times an allele needs to be observed) [default 0.05].
#' @param ld_stat The LD measure to be calculated: "LLR", "OR", "Q", "Covar",
#' "D.prime", "R.squared", and "R". See \code{\link[snpStats]{ld}}
#' (package snpStats) for details [default "R.squared"].
#' @param ind.limit Minimum number of individuals that a population should
#' contain to take it in account to report loci in LD [default 10].
#' @param haplo_id Whether to identify haplotypes [default FALSE].
#' @param min_snps Minimum number of SNPs that should have a haplotype to call
#' it [default 10].
#' @param ld_threshold_haplo Minimum LD between adjacent SNPs to call a
#' haplotype [default 0.5].
#' @param target_snp Position of target SNP [default NULL].
#' @param coordinates A vector of two elements with the start and end
#' coordinates in base pairs to which restrict the
#' analysis e.g. c(1,1000000) [default NULL].
#' @param color_haplo Color palette for haplotype plot. See details
#' [default "viridis"].
#' @param color_het Color for heterozygosity [default "deeppink"].
#' @param plot.out Specify if heatmap plot is to be produced [default TRUE].
#' @param plot.dir Directory in which to save files [default = working directory]
#' @param plot.save Whethwr to save the plot in pdf format [default FALSE].
#' @param verbose Verbosity: 0, silent or fatal errors; 1, begin and end; 2,
#' progress log; 3, progress and results summary; 5, full report
#' [default 2, unless specified using gl.set.verbosity].
#'
#' @details
#' The information for SNP's position should be stored in the genlight accessor
#' "@@position" and the SNP's chromosome name in the accessor "@@chromosome"
#' (see examples). The function will then calculate LD within each chromosome.
#'
#' The output of the function includes a table with the haplotypes
#' that were identified and their location.
#'
#' Colors of the heatmap (\code{color_haplo}) are based on the function
#' \code{\link[viridis]{scale_fill_viridis}} from package \code{viridis}.
#' Other color palettes options are "magma", "inferno", "plasma", "viridis",
#' "cividis", "rocket", "mako" and "turbo".
#' @return A table with the haplotypes that were identified.
#' @family ld functions
#' @examples
#' require("dartR.data")
#' x <- platypus.gl
#' x <- gl.filter.callrate(x, threshold = 1)
#' # only the first 15 individuals because of speed during tests
#' x <- gl.keep.pop(x, pop.list = "TENTERFIELD")[1:15, ]
#' x$chromosome <- as.factor(x$other$loc.metrics$Chrom_Platypus_Chrom_NCBIv1)
#' x$position <- x$other$loc.metrics$ChromPos_Platypus_Chrom_NCBIv1
#' ld_res <- gl.ld.haplotype(x,
#' chrom_name = "NC_041728.1_chromosome_1",
#' ld_max_pairwise = 10000000
#' )
#'
#' @author Custodian: Luis Mijangos -- Post to
#' \url{https://groups.google.com/d/forum/dartr}
#' @export
gl.ld.haplotype <- function(x,
pop_name = NULL,
chrom_name = NULL,
ld_max_pairwise = 10000000,
maf = 0.05,
ld_stat = "R.squared",
ind.limit = 10,
haplo_id = FALSE,
min_snps = 10,
ld_threshold_haplo = 0.5,
target_snp = NULL,
coordinates = NULL,
color_haplo = "viridis",
color_het = "deeppink",
plot.out = TRUE,
plot.save = FALSE,
plot.dir = NULL,
verbose = NULL) {
# SET VERBOSITY
verbose <- gl.check.verbosity(verbose)
# SET WORKING DIRECTORY
plot.dir <- gl.check.wd(plot.dir, verbose = 0)
# FLAG SCRIPT START
funname <- match.call()[[1]]
utils.flag.start(
func = funname,
build = "Jody",
verbose = verbose
)
# CHECK DATATYPE
datatype <- utils.check.datatype(x, verbose = verbose)
# FUNCTION SPECIFIC ERROR CHECKING
# check if packages are installed
pkg <- "snpStats"
if (!(requireNamespace(pkg, quietly = TRUE))) {
cat(error(
"Package",
pkg,
" needed for this function to work. Please install it.\n"
))
return(-1)
}
pkg <- "fields"
if (!(requireNamespace(pkg, quietly = TRUE))) {
cat(error(
"Package",
pkg,
" needed for this function to work. Please install it.\n"
))
return(-1)
}
pkg <- "sp"
if (!(requireNamespace(pkg, quietly = TRUE))) {
cat(error(
"Package",
pkg,
" needed for this function to work. Please install it.\n"
))
return(-1)
}
# DO THE JOB
group <- het <- lat <- long <- NULL
if (is.null(chrom_name) == FALSE) {
chrom_tmp <- unlist(lapply(chrom_name, function(y) {
which(x$chromosome == y)
}))
chrom_tmp <- chrom_tmp[order(chrom_tmp)]
x <- gl.keep.loc(x, loc.list = locNames(x)[chrom_tmp], verbose = 0)
}
if (is.null(pop_name) == FALSE) {
x <- gl.keep.pop(x, pop.list = pop_name, verbose = 0)
}
x_list <- seppop(x)
if (is.null(coordinates) == FALSE) {
cat(report(
" Restricting the analysis from", coordinates[1], "to",
coordinates[2], "base pairs\n"
))
x_list <- lapply(x_list, function(y) {
loc_names <- which(y$position >= coordinates[1] &
y$position <= coordinates[2])
y <- gl.keep.loc(y, loc.list = locNames(y)[loc_names], verbose = 0)
return(y)
})
}
haplo_table <- as.data.frame(matrix(nrow = 1, ncol = 10))
colnames(haplo_table) <- c(
"population", "chromosome", "haplotype", "start", "end",
"start_ld_plot", "end_ld_plot", "midpoint",
"midpoint_ld_plot", "labels"
)
chr_list <- as.character(unique(x$chromosome))
# p <- NULL
p <- rep(list(as.list(rep(NA, length(chr_list)))), length(names(x_list)))
names(p) <- names(x_list)
p <- lapply(p, function(x) {
names(x) <- paste0("chr_", chr_list)
return(x)
})
for (pop_n in 1:length(x_list)) {
pop_ld <- x_list[[pop_n]]
pop_name <- popNames(pop_ld)
if (nInd(pop_ld) <= ind.limit) {
cat(warn(
paste(
" Skipping population",
pop_name,
"from analysis because it has less than",
ind.limit,
"individuals.\n"
)
))
next()
}
if (verbose >= 2) {
cat(report(" Calculating pairwise LD in population", pop_name, "\n"))
}
# ordering SNPs by chromosome and position
hold <- pop_ld
pop_ld <- hold[, order(hold$chromosome, hold$position)]
pop_ld$other$loc.metrics <-
hold$other$loc.metrics[order(hold$chromosome, hold$position), ]
pop_ld <- gl.recalc.metrics(pop_ld, verbose = 0)
if (maf > 0) {
pop_ld <- gl.filter.maf(pop_ld, threshold = maf, verbose = 0)
}
gl2plink(pop_ld,
outfile = paste0("gl_plink", "_", pop_name),
verbose = 0
)
# Read a pedfile as "SnpMatrix" object using a modified version of the
# function read.pedfile from package snpStats
snp_stats <-
utils.read.ped(
file = paste0(tempdir(), "/", "gl_plink", "_", pop_name, ".ped"),
snps = paste0(tempdir(), "/", "gl_plink", "_", pop_name, ".map"),
sep = " ",
show_warnings = FALSE,
na.strings = NA
)
ld_map <- snp_stats$map
colnames(ld_map) <-
c(
"chr",
"snp.name",
"null",
"loc_bp",
"allele.1",
"allele.2"
)
ld_map$chr <- pop_ld$chromosome
genotype <- snp_stats$genotypes
colnames(genotype@.Data) <- ld_map$loc_bp
for (chrom in 1:length(chr_list)) {
chr_name <- chr_list[chrom]
if (verbose >= 2) {
cat(report(" Analysing chromosome", chr_name, "\n"))
}
ld_loci <- which(ld_map$chr == chr_list[chrom])
ld_map_loci <- ld_map[ld_loci, ]
genotype_loci <- genotype[, ld_loci]
# removing loci that have the same location
dupl_loci <- which(duplicated(ld_map_loci$loc_bp))
if (length(dupl_loci) > 0) {
ld_map_loci <- ld_map_loci[-dupl_loci, ]
genotype_loci <- genotype_loci[, -dupl_loci]
}
if (nrow(ld_map_loci) <= 1) {
next
}
# this is the mean distance between each snp which is used to determine
# the depth at which LD analyses are performed
mean_dis <- mean(diff(ld_map_loci$loc_bp))
ld_depth_b <- ceiling((ld_max_pairwise / mean_dis)) - 1
if (ld_depth_b < 5) {
cat(warn(
" The maximum distance at which LD should be calculated
(ld_max_pairwise) is too short for chromosome", chr_name,
". Setting this distance to", round(mean_dis * 5, 0), "bp\n"
))
ld_depth_b <- 5
}
# function to calculate LD
ld_snps <- snpStats::ld(genotype_loci,
depth = ld_depth_b,
stats = ld_stat
)
ld_matrix_2 <- ld_snps
colnames(ld_matrix_2) <- rownames(ld_matrix_2)
rownames(ld_matrix_2) <- 1:nrow(ld_matrix_2)
colnames(ld_matrix_2) <- 1:ncol(ld_matrix_2)
ld_columns_2 <- as.data.frame(as.table(as.matrix(ld_matrix_2)))
# remove cases where LD was not calculated
ld_columns_2 <- ld_columns_2[-ld_columns_2$Freq < 0, ]
ld_columns_2$Var1 <- as.numeric(as.character(ld_columns_2$Var1))
ld_columns_2$Var2 <- as.numeric(as.character(ld_columns_2$Var2))
ld_columns_2 <- ld_columns_2[complete.cases(ld_columns_2), ]
raster_haplo <- raster::rasterFromXYZ(ld_columns_2)
polygon_haplo <-
raster::rasterToPolygons(
raster_haplo,
fun = NULL,
n = 4,
na.rm = TRUE,
digits = 12,
dissolve = TRUE
)
# polygon_haplo <- elide.polygonsdf_2(polygon_haplo,rotate = 45)
polygon_haplo <- sp::elide(polygon_haplo, rotate = 45)
polygon_haplo$id <- rownames(as.data.frame(polygon_haplo))
# this only has the coordinates
polygon_haplo.pts <- fortify(polygon_haplo, polygon_haplo = "id")
# add the attributes back
polygon_haplo.df <-
merge(polygon_haplo.pts,
polygon_haplo,
by = "id",
type = "left"
)
width_poly <- round(max(polygon_haplo.df$long), 0)
height_poly <- max(polygon_haplo.df$lat)
reduce_factor <- nrow(ld_map_loci) / width_poly
enlarge_factor <- width_poly / nrow(ld_map_loci)
# this is to correct the cell size unit when the polygon figure is more or
# less than 1000 units
correction_factor <- (width_poly / 1000)
ld_matrix_3 <- ld_snps
ld_matrix_3 <- as.matrix(ld_matrix_3)
dimnames(ld_matrix_3) <- NULL
matrix_rotate <-
rotate.matrix(
x = ld_matrix_3,
angle = -45
)
matrix_rotate_2 <- matrix_rotate
matrix_rotate_2[matrix_rotate_2 == 0] <- NA
matrix_rotate_3 <-
t(zoo::na.locf(
t(matrix_rotate_2),
fromLast = FALSE,
na.rm = TRUE
))
means_col <- apply(matrix_rotate_3, 2, var, na.rm = TRUE)
means_col[means_col == 0] <- NA
means_col <- zoo::na.locf(means_col, fromLast = TRUE)
means_col <- means_col * 2
df_col <-
as.data.frame(matrix(ncol = 2, nrow = length(means_col)))
df_col[, 1] <- 1:nrow(df_col)
df_col[, 2] <- means_col
df_col[, 2] <-
scales::rescale(df_col[, 2], to = c(0, height_poly))
means_col_2 <- means_col
mean_column <- as.numeric(summary(means_col_2, na.rm = TRUE)[1:6])
mean_column <-
scales::rescale(mean_column, to = c(0, height_poly))
# as the matrix was rotated the position of the snps is not correct anymore.
# the following code reassigns the snp position based on the second row from
# bottom to top of the rotated matrix. the first two and the last snps are
# removed to take in account the snps that are not present in the second row
second_row_temp <- which(!is.na(matrix_rotate_3[, 1])) - 1
second_row <- second_row_temp[length(second_row_temp)]
second_row_2 <- matrix_rotate_2[second_row, ]
second_row_3 <- second_row_2
row_snp <- as.numeric(rownames(ld_snps))
reassign_loc <- row_snp[3:length(row_snp)]
reassign_loc <- reassign_loc[1:length(reassign_loc) - 1]
element_reassign_loc <- 1
for (loc in 1:length(second_row_2)) {
if (is.na(second_row_2[loc])) {
next
} else {
second_row_3[loc] <- reassign_loc[element_reassign_loc]
element_reassign_loc <- element_reassign_loc + 1
}
}
# putting back the first two snps and the last that were removed
second_row_3[1:2] <- row_snp[1:2]
second_row_3[length(second_row_3)] <- row_snp[length(row_snp)]
# filling the NAs
second_row_4 <-
zoo::na.locf(second_row_3, fromLast = TRUE, na.rm = FALSE)
second_row_4 <-
zoo::na.locf(second_row_4, fromLast = FALSE, na.rm = FALSE)
second_row_ver_2 <- zoo::na.locf(second_row_2, fromLast = TRUE)
first_row <- which(!is.na(matrix_rotate_3[, 1]))[1]
first_row_2 <- matrix_rotate_3[first_row, ]
first_row_2 <- round(first_row_2, 2)
# this is SNP's heterozygosity to be calculated alone
het_tmp <- utils.basic.stats(pop_ld)
snp_het_alone <-
data.frame(
position = pop_ld$position,
het = unname(het_tmp$Hs)
)
snp_het_alone$position <- 1:nrow(snp_het_alone)
snp_het_alone[, 1] <-
scales::rescale(snp_het_alone[, 1], to = c(0, width_poly))
snp_het_alone[, 2] <-
scales::rescale(snp_het_alone[, 2], to = c(0, height_poly))
# identifying haplotypes
if (any(first_row_2 > ld_threshold_haplo) &
haplo_id) {
haplo_loc_test <- first_row_2 >= ld_threshold_haplo
haplo_loc_test <- c(haplo_loc_test, FALSE)
start_haplo <- NULL
end_haplo <- NULL
for (i in 1:(length(haplo_loc_test) - 1)) {
if (haplo_loc_test[i] == haplo_loc_test[i + 1]) {
next()
}
if (haplo_loc_test[i] == TRUE) {
end_haplo_temp <- i
end_haplo <- c(end_haplo, end_haplo_temp)
}
if (haplo_loc_test[i] == FALSE) {
start_haplo_temp <- i
start_haplo <- c(start_haplo, start_haplo_temp)
}
}
start_haplo <- c(1, start_haplo)
start_haplo_2 <- second_row_4[start_haplo]
end_haplo_2 <- second_row_4[end_haplo]
if (length(start_haplo_2) != length(end_haplo_2)) {
start_haplo_2 <- start_haplo_2[-length(start_haplo_2)]
}
haplo_1_ver_2 <- as.data.frame(cbind(start_haplo_2, end_haplo_2))
haplo_1_ver_2$size <- (haplo_1_ver_2[, 2] - haplo_1_ver_2[, 1])
n_snps <- as.matrix(haplo_1_ver_2[, 1:2])
n_snps[, 1] <- n_snps[, 1] + 1
n_snps <- n_snps[which(n_snps[, 1] != n_snps[, 2]), ]
if (!is.matrix(n_snps)) {
n_snps <- as.matrix(t(n_snps))
}
n_snps <- n_snps[!duplicated(n_snps[, 1]), ]
if (!is.matrix(n_snps)) {
n_snps <- as.matrix(t(n_snps))
}
n_snps <- n_snps[!duplicated(n_snps[, 2]), ]
df.4.cut <-
as.data.frame(table(cut(row_snp, breaks = n_snps)),
stringsAsFactors =
FALSE
)
df.4.cut <- df.4.cut[which(df.4.cut$Freq >= min_snps), ]
if (nrow(df.4.cut) < 1) {
cat(warn(" No haplotypes with more than ", min_snps, "were found.
Try using a lower threshold.\n"))
next()
}
df.4.cut_3 <- gsub("[][()]", "", df.4.cut$Var1, ",")
df.4.cut_3 <- strsplit(df.4.cut_3, ",")
df.4.cut_4 <- lapply(df.4.cut_3, as.numeric)
if (length(df.4.cut_4) == 1) {
df.4.cut_4 <- data.frame(t(matrix((df.4.cut_4[[1]]))))
} else {
df.4.cut_4 <- as.data.frame(plyr::laply(df.4.cut_4, rbind))
}
df.4.cut_4[, 3] <- (df.4.cut_4[, 2] - df.4.cut_4[, 1])
# this is to calculate the real distance in bp of the polygon figure of LD
real_distance <- c(0, second_row_4)
real_distance_2 <- diff(real_distance)
real_distance_3 <- cumsum(real_distance_2)
real_distance_4 <-
as.data.frame(cbind(1:length(real_distance_3), real_distance_3))
test_var <- unname(unlist(df.4.cut_4[, 1:2]))
location_test <-
lapply(test_var, findInterval, vec = as.numeric(paste(
unlist(real_distance_4$real_distance_3)
)))
location_test_2 <- unlist(location_test)
test_var_2 <-
as.data.frame(cbind(1:length(location_test_2), location_test_2))
hap_blocks <-
as.data.frame(cbind(
paste0("CHR_", 1:nrow(df.4.cut_4)),
rep(1, times = nrow(df.4.cut_4)),
df.4.cut_4[, 1:3],
df.4.cut$Freq
))
colnames(hap_blocks) <-
c("BLOCK", "CHR", "BP1", "BP2", "SIZE", "NSNP")
locations_temp <-
as.data.frame(cbind(hap_blocks$BP1, hap_blocks$BP2))
locations_temp_2 <- locations_temp
colnames(locations_temp_2) <- c("start", "end")
locations_temp_2$start_ld_plot <-
unlist(lapply(locations_temp_2$start, findInterval, vec = as.numeric(paste(
unlist(real_distance_4$real_distance_3)
))))
locations_temp_2$end_ld_plot <-
unlist(lapply(locations_temp_2$end, findInterval, vec = as.numeric(paste(
unlist(real_distance_4$real_distance_3)
))))
locations_temp_2$midpoint <-
(locations_temp_2$start + locations_temp_2$end) / 2
locations_temp_2$midpoint_ld_plot <-
(locations_temp_2$start_ld_plot + locations_temp_2$end_ld_plot) / 2
locations_temp_2$labels <-
paste0(
as.character(round(locations_temp_2$start / 1000000, 0)), "-",
as.character(round(locations_temp_2$end / 1000000, 0))
)
haplo_temp_a <-
locations_temp_2[which(as.numeric(row.names(locations_temp_2)) %% 2 == 1), ]
haplo_temp_b <-
locations_temp_2[which(as.numeric(row.names(locations_temp_2)) %% 2 == 0), ]
ticks_breaks <-
c(
locations_temp_2$start_ld_plot,
locations_temp_2$end_ld_plot
)
ticks_breaks <- ticks_breaks[order(ticks_breaks)]
ticks_lab <- c(locations_temp_2$start, locations_temp_2$end)
ticks_lab <- round(ticks_lab / 1000000, 0)
ticks_lab <- as.character(ticks_lab[order(ticks_lab)])
ticks_joint <- as.data.frame(cbind(ticks_breaks, ticks_lab))
ticks_joint <- ticks_joint[!duplicated(ticks_joint$ticks_lab), ]
ticks_joint$ticks_lab <- as.character(ticks_joint$ticks_lab)
ticks_joint$ticks_breaks <-
as.numeric(as.character(ticks_joint$ticks_breaks))
colors_plot <-
c(
"Heterozygosity" = color_het,
"Haplotypes limits" = "lightgoldenrod3"
)
labels_haplo <- as.character(1:nrow(locations_temp_2))
# for an unknown reason, the name of the fill variable in the geom_polygon
# changes between Freq and layer. So, when there is an error in displaying
# the graphic, the name of this variable has to be changed for it to work
haplo_table_tmp <- rbind(haplo_temp_a, haplo_temp_b)
haplo_table_tmp <- cbind(
haplotype = as.numeric(rownames(haplo_table_tmp)),
haplo_table_tmp
)
haplo_table_tmp <- haplo_table_tmp[order(haplo_table_tmp$haplotype), ]
haplo_table_tmp <- cbind(chromosome = chr_name, haplo_table_tmp)
haplo_table_tmp <- cbind(population = pop_name, haplo_table_tmp)
haplo_table <- rbind(haplo_table, haplo_table_tmp)
p_temp <- NULL
p_temp <- ggplot() +
geom_rect(
aes(
xmin = haplo_temp_a$start_ld_plot,
xmax = haplo_temp_a$end_ld_plot,
ymin = min(polygon_haplo.df$lat) - 30,
ymax = max(polygon_haplo.df$lat) + 30
),
color = "cornsilk3",
fill = "cornsilk3"
) +
geom_rect(
aes(
xmin = haplo_temp_b$start_ld_plot,
xmax = haplo_temp_b$end_ld_plot,
ymin = min(polygon_haplo.df$lat) - 30,
ymax = max(polygon_haplo.df$lat) + 30
),
color = "cornsilk4",
fill = "cornsilk4"
) +
geom_polygon(
data = polygon_haplo.df,
aes(long, lat,
group = group,
fill = polygon_haplo.df[, "Freq"]
)
) +
viridis::scale_fill_viridis(name = ld_stat, option = color_haplo) +
geom_vline(aes(
xintercept = c(
haplo_temp_b$start_ld_plot,
haplo_temp_b$end_ld_plot,
haplo_temp_a$start_ld_plot,
haplo_temp_a$end_ld_plo
),
color = "Haplotypes limits"
), linewidth = 1) +
geom_line(
data = snp_het_alone,
aes(x = position, y = het, color = "Heterozygosity"),
inherit.aes = FALSE,
linewidth = 1 / 2,
alpha = 1
) +
annotate(
"text",
x = locations_temp_2$midpoint_ld_plot,
y = max(polygon_haplo.df$lat) + 13,
label = labels_haplo,
size = 3,
color = "black"
) +
annotate(
"text",
x = locations_temp_2$midpoint_ld_plot,
y = min(polygon_haplo.df$lat) - 13,
label = locations_temp_2$labels,
size = 3,
color = "black"
) +
labs(
x = "Chromosome location (Mbp)",
y = "Het",
title = paste("Population", pop_name, "Chromosome", chr_name, "-", nLoc(pop_ld), "SNPs")
) +
scale_x_continuous(
breaks = ticks_joint$ticks_breaks,
labels = ticks_joint$ticks_lab
) +
scale_colour_manual(name = "", values = colors_plot) +
theme_void() +
theme(
legend.position = "top",
legend.text = element_text(size = 10),
axis.ticks.y = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_text(hjust = 0.5),
axis.ticks.x = element_blank(),
axis.text.x = element_blank()
) +
coord_fixed(ratio = 1 / 1)
# p <- c(p,p_temp)
# p[[pop_n]][[chrom]] <- p_temp
# # PRINTING OUTPUTS
if (plot.out) {
print(p_temp)
}
} else {
cat(warn(" No haplotypes were identified for chromosome", chr_name, "\n"))
colors_plot <- c("Heterozygosity" = color_het)
p_pos <- snp <- p_temp <- NULL
p_temp <- ggplot() +
geom_polygon(
data = polygon_haplo.df,
aes(long, lat,
group = group,
fill = polygon_haplo.df[, "Freq"]
)
) +
viridis::scale_fill_viridis(name = ld_stat, option = color_haplo) +
geom_line(
data = snp_het_alone,
aes(x = position, y = het, color = "Heterozygosity"),
inherit.aes = FALSE,
linewidth = 1 / 2,
alpha = 1
) +
labs(
x = "Chromosome location (Mbp)",
y = "Het",
title = paste("Population", pop_name, "Chromosome", chr_name, "-", length(pop_ld$position), "SNPs")
) +
scale_colour_manual(name = "", values = colors_plot) +
theme_void() +
theme(
legend.position = "top",
legend.text = element_text(size = 10),
axis.ticks.y = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.x = element_blank()
) +
coord_fixed(ratio = 1 / 1)
snp <- pop_ld$position
snp <- snp[order(snp)]
snp <- data.frame(snp=snp)
colnames(snp) <- "snp"
snp$order <- 1:nrow(snp)
snp$scale <- scales::rescale(snp$snp, to = c(1,length(pop_ld$position)))
snp1 <-snp
snp1$y <- 2
snp2 <- snp
snp2$y <- 1
snp2$order <- snp1$scale
snp_fin <- rbind(snp1,snp2)
labels_tmp <- c(seq(1,max(pop_ld$position),max(pop_ld$position)/10),max(pop_ld$position))
labels_plot <- round(labels_tmp,-6)/1000000
breaks_plot <- scales::rescale(labels_plot,to = c(1,length(pop_ld$position)) )
snp_fin$colorL <- "black"
if(!is.null(target_snp)){
snp_pos <- target_snp
snp_fin[which(snp_fin$snp %in% target_snp),"colorL"] <- "red"
}
y<- NA
p_pos <- ggplot(snp_fin, aes(x = order, y = y, group = snp)) +
geom_line(color= snp_fin$colorL) +
theme(
panel.background = element_rect(fill = "transparent", colour = NA),
plot.background = element_rect(fill = "transparent", colour = NA),
panel.grid = element_blank(),
panel.border = element_blank(),
plot.margin = unit(c(0, 0, 0, 0), "null"),
panel.spacing = unit(c(0, 0, 0, 0), "null"),
axis.ticks.y = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
legend.position = "none"
)+
scale_x_continuous(breaks =breaks_plot ,
labels =labels_plot ) +
labs(
x = "Chromosome location (Mbp)")
layout <- c(
area(t = 1, l = 1, b = 5, r = 1),
area(t = 4, l = 1, b = 6, r = 1)
)
p_temp <- p_temp / p_pos +
plot_layout(design = layout)
# p <- c(p,p_temp)
# p[[pop_n]][[chrom]] <- p_temp
# PRINTING OUTPUTS
if (plot.out) {
print(p_temp)
# print(p)
}
file.name <- paste0(plot.dir,"/",pop_name, "_", chr_name,".pdf")
ggsave(filename = file.name,
plot = p_temp,
width = 15,
height = 5,
units = "in",
dpi="retina",
bg = "transparent",
limitsize = FALSE)
if (!is.null(plot.save)) {
# utils.plot.save(p_temp,
# dir = plot.dir,
# file = filename,
# verbose = verbose
# )
file.name <- paste0(plot.dir,"/",pop_name, "_", chr_name,".pdf")
ggsave(filename = file.name,
width = 15,
height = 5,
units = "in",
dpi="retina",
bg = "transparent",
limitsize = FALSE)
}
}
}
}
# # PRINTING OUTPUTS
# if (plot.out) {
# print(p)
# }
haplo_table <- haplo_table[-1, ]
print(haplo_table, row.names = FALSE)
# Optionally save the plot ---------------------
# if (!is.null(plot.file)) {
# tmp <- utils.plot.save(p,
# dir = plot.dir,
# file = plot.file,
# verbose = verbose
# )
# }
# FLAG SCRIPT END
if (verbose >= 1) {
cat(report("Completed:", funname, "\n"))
}
# RETURN
return(invisible(haplo_table))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.