knitr::opts_chunk$set(echo = TRUE, message=FALSE)

Preliminaries

library(GenomicRanges) # for class GRange
library(rtracklayer) # for function import.gff
library(knitr) # for function kable
if (!require("csaw")) {
  BiocManager::install("csaw")
}

Helper Function

get_annotated_regions_from_gff <- function(file_name) {
gff <- rtracklayer::import.gff(file_name)
as(gff, "GRanges")
}

Downloading Data

# original: file.path(getwd(), "datasets", "ch1", "windows.bam"),
urldir <- "https://raw.githubusercontent.com/PacktPublishing/R-Bioinformatics-Cookbook/master/datasets/Chapter01"
bamname <- "windows.bam"
bainame <- "windows.bam.bai"
if (!file.exists(bamname)) {
  download.file(file.path(urldir,bamname),bamname)
}
if (!file.exists(bainame)) {
  download.file(file.path(urldir,bainame),bainame)
}
annoname <- "genes.gff"
annourl <- file.path(urldir, "genes.gff")
if (!file.exists(annoname)) {
  download.file(annourl, annoname)
}
annotated_regions <- get_annotated_regions_from_gff(annoname)
kable(head(annotated_regions))

Reading File

whole_genome <- csaw::windowCounts(
bamname,
bin = TRUE,
filter = 0,
width = 500,
param = csaw::readParam(
minq = 20,
dedup = TRUE,
pe = "both"
)
)

Find Overlaps with Existing Annotations

library(IRanges)
library(SummarizedExperiment)
windows_in_genes <-IRanges::overlapsAny(
SummarizedExperiment::rowRanges(whole_genome), annotated_regions )
windows_in_genes

Distinguish Annotated and non-Annotated Regions

annotated_window_counts <- whole_genome[windows_in_genes,]
non_annotated_window_counts <- whole_genome[ ! windows_in_genes,]

Looking at Data That Corresponds to Non-annotated Regions

head(non_annotated_window_counts)
colData(non_annotated_window_counts)


eckartbindewald/bfxapps2 documentation built on Feb. 6, 2025, 3:22 a.m.