#' @title Filter a gene set and extract upstream or downstream distances
#' do their neighbors
#' @description Extract distances to the upstream or downstream neighbors
#'
#' @param GNN A \code{tibble} obtained with the
#' \code{\link{getGeneNeighborhood}} function.
#' Can also be any data.frame with the relevant information.
#' @param glist A character vector of gene IDs (must match GNN$GeneName)
#' @param Side One of 'Upstream' or 'Downstream'
#'
#' @importFrom magrittr %>%
#' @importFrom dplyr filter select rename slice mutate transmute pull
#' group_by summarise bind_rows one_of
#' @importFrom rlang .data !!
#'
#' @export
#'
#' @return A tibble with the following columns:
#' \itemize{
#' \item GeneName. ID of the focus gene.
#' \item Neighbor. ID of the gene neighbor
#' \item Orientation. Orientation of the neighbor (same or opposite strand).
#' \item Side. Upstream or Downstream.
#' \item Distance in bp
#' }
#'
#' @section DETAILS:
#' Note that the function removes overlapping upstream/downstream genes
#' but not adjacent genes (for which distance = 0).
#'
#' @seealso \code{\link{getGeneNeighborhood}}
#' \code{\link{dist2Neighbors}}
#'
#' @examples
#' ## Obtain gene neighborhood information:
#' GeneNeighbors <- getGeneNeighborhood(Genegr)
#' ## Get a (random) set of (10) genes:
#' set.seed(123)
#' randGenes <- sample(names(Genegr), 10)
#' ## Extract distances to the upstream neighbors for this set:
#' getDistSide(GeneNeighbors, randGenes, "Upstream")
#' ## And for the downstream genes:
#' getDistSide(GeneNeighbors, randGenes, "Downstream")
getDistSide <- function(GNN,
glist,
Side = c("Upstream", "Downstream")) {
Side <- match.arg(Side)
className <- paste0(Side, "Class")
DistName <- paste0(Side, "Distance")
selectedColumns <- c("GeneName",
Side,
className,
DistName)
GNNdata <- GNN %>%
dplyr::select(dplyr::one_of(selectedColumns)) %>%
dplyr::rename("SideGene" = !!Side,
"SideClass" = !!className,
"Distance" = !!DistName)
#Remove gene that have no upstream/downstream data (i.e. class=="other")
hasSide <- GNNdata %>%
dplyr::slice(match(glist, .data$GeneName)) %>%
dplyr::transmute(hasSide = !is.na(.data$SideGene) &
!is.na(.data$Distance)) %>%
dplyr::pull(hasSide)
if (any(!hasSide)) {
message(sum(!hasSide), paste0(" genes with undefined ",
tolower(Side),
" gene are removed from Geneset"))
}
glistSel <- glist[hasSide]
##Remove genes with overlapping upstream/downstream gene
isOVL <- GNNdata %>%
dplyr::slice(match(glistSel, .data$GeneName)) %>%
dplyr::transmute(isOVL = grepl("overlap",
tolower(.data$SideClass)) &
.data$Distance == 0) %>%
dplyr::pull(isOVL)
if (any(isOVL)) {
message(sum(isOVL),
paste0(" genes with an overlapping ",
tolower(Side),
" gene are removed from Geneset"))
glistSel <- glistSel[!isOVL]
}
##Count the remaining genes
ngSel <- length(glistSel) #Number of genes with Upstream or Downstream info
message("Final number of genes with ",
tolower(Side),
" gene data: ",
ngSel,
" genes")
##Extract distances
res <- GNNdata %>%
dplyr::filter(.data$GeneName %in% glistSel) %>%
dplyr::rename("Neighbor" = "SideGene", "Orientation" = "SideClass") %>%
dplyr::mutate("Side" = Side)
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.