Nothing
#' Duplicate SNPs found in overlapping gene pairs.
#'
#' @param bin_mat Matrix. Binary matrix.
#' @param gff_mat Matrix. GFF information.
#' @noRd
#' @return bin_mat_dup: Matrix.
dup_snps_in_overlapping_genes <- function(bin_mat, gff_mat){
check_is_this_class(bin_mat, "matrix")
check_is_this_class(gff_mat, "matrix")
check_if_binary_matrix(bin_mat)
if (ncol(gff_mat) != 9) {
stop("GFF matrix must have exactly 9 columns")
}
# position of SNP
pos <- gsub("[.].*$", "", row.names(bin_mat))
# what genes are present at the pos?
genes_at_pos <- lapply(pos, function(p) {
gff_mat[gff_mat[, 4] <= p & gff_mat[, 5] >= p, 9]
})
# number of genes per position
num_of_genes_at_pos <- sapply(genes_at_pos, length)
# indices of bin_mat (number of rows)
indices <- 1:nrow(bin_mat)
# row names: pos.num|gene_name
# there will be a .num. if its a multiallelic site
pos_dup_gene <- paste(rep(row.names(bin_mat), num_of_genes_at_pos),
unlist(genes_at_pos),
sep = "|")
# duplicate overlapping genes
bin_mat_dup <- bin_mat[rep(indices, num_of_genes_at_pos), ]
row.names(bin_mat_dup) <- pos_dup_gene
return(bin_mat_dup)
}
#' Duplicate SNPs found in overlapping gene pairs when there are snpeff
#' annotations in the vcf file.
#'
#' @param bin_mat Matrix. Binary matrix.
#' @param predicted_impact vector of impacts, separated by pipes if pos in
#' overlapping gene
#' @param gene vector of genes, separated by pipes if pos in overlapping gene
#' @noRd
#' @return
#' bin_mat_dup. Matrix.
#' predicted_impact_split. Character vector.
#'
dup_snps_in_overlapping_genes_snpeff <- function(bin_mat,
predicted_impact,
gene) {
check_is_this_class(bin_mat, "matrix")
check_if_binary_matrix(bin_mat)
check_is_this_class(gene, "character")
check_is_this_class(predicted_impact, "character")
check_num_overlap_genes_match_num_impact(predicted_impact, gene)
# number of genes per position
num_ofgenes_at_pos <- sapply(gene, function(g) {
length(unlist(strsplit(g, "[|]")))
})
# get gene names
genes_at_pos <- sapply(gene, function(g) {
unlist(strsplit(g, "[|]"))
})
# snpeff impacts
predicted_impact_split <- unname(unlist(sapply(predicted_impact, function(g) {
unlist(strsplit(g, "[|]"))
})))
# indices of bin_mat (number of rows)
indices <- 1:nrow(bin_mat)
# row names: pos.num|gene_name
# there will be a .num. if its a multiallelic site
pos_dup_gene <- paste(rep(row.names(bin_mat), num_ofgenes_at_pos),
unlist(genes_at_pos),
sep = "|")
# duplicate overlapping genes
bin_mat_dup <- bin_mat[rep(indices, num_ofgenes_at_pos), ]
row.names(bin_mat_dup) <- pos_dup_gene
return(list(bin_mat_dup = bin_mat_dup,
predicted_impact_split = predicted_impact_split))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.