###
format_txdb_dump <- GenomicFeatures:::.format_txdb_dump
test_makeTxDbFromGRanges_empty <- function()
{
## The GFF3 way
gr1 <- GRanges(type=character(0), ID=character(0))
txdb1 <- makeTxDbFromGRanges(gr1)
checkTrue(validObject(txdb1))
checkTrue(all(elementNROWS(as.list(txdb1)) == 0L))
checkIdentical(0L, length(seqinfo(txdb1)))
checkIdentical(NA_character_, organism(txdb1))
## The GTF way
gr2 <- GRanges(type=character(0),
gene_id=character(0), transcript_id=character(0))
txdb2 <- makeTxDbFromGRanges(gr2)
checkIdentical(as.list(txdb1), as.list(txdb2))
}
test_makeTxDbFromGRanges_no_exons_no_cds <- function()
{
## 2 transcripts linked to a gene + 2 orphan transcripts
gene_ID <- "gene1"
gene_start <- 101
gene_end <- 160
tx_ID <- c( "tx1a", "tx2", "tx1b", "tx3")
tx_Parent <- c("gene1", NA, "gene1", NA)
tx_start <- c( 151, 201, 101, 5)
tx_end <- c( 160, 250, 120, 35)
gene_gr <- GRanges("chr1", IRanges(gene_start, gene_end), "+",
type="gene", ID=gene_ID, Parent=NA)
tx_gr <- GRanges("chr1", IRanges(tx_start, tx_end), "+",
type="mRNA", ID=tx_ID, Parent=tx_Parent)
gr <- c(gene_gr, tx_gr)
target_transcripts <- data.frame(
tx_id=1:4,
tx_name=c("tx3", "tx1b", "tx1a", "tx2"),
tx_type="mRNA",
tx_chrom="chr1",
tx_strand="+",
tx_start=c(5, 101, 151, 201),
tx_end=c(35, 120, 160, 250)
)
target_splicings <- data.frame(
tx_id=target_transcripts$tx_id,
exon_rank=1,
exon_id=1:4,
exon_name=target_transcripts$tx_name,
exon_chrom=target_transcripts$tx_chrom,
exon_strand=target_transcripts$tx_strand,
exon_start=target_transcripts$tx_start,
exon_end=target_transcripts$tx_end
)
target_genes <- data.frame(
tx_id=2:3,
gene_id="gene1"
)
target_chrominfo <- data.frame(
chrom="chr1",
length=NA,
is_circular=NA
)
target_txdb_dump <- format_txdb_dump(transcripts=target_transcripts,
splicings=target_splicings,
genes=target_genes,
chrominfo=target_chrominfo)
current_txdb <- makeTxDbFromGRanges(gr)
checkIdentical(target_txdb_dump, as.list(current_txdb))
## make Parent a CharacterList instead of an atomic vector
mcols(gr)$Parent <- CharacterList(character(0))
mcols(gr)$Parent[c(2L, 4L)] <- "gene1"
current_txdb <- makeTxDbFromGRanges(gr)
checkIdentical(target_txdb_dump, as.list(current_txdb))
## 4 orphan transcripts
gr4 <- gr[mcols(gr)$type != "gene"] # remove the gene
target_txdb_dump <- format_txdb_dump(transcripts=target_transcripts,
splicings=target_splicings,
chrominfo=target_chrominfo)
current_txdb <- makeTxDbFromGRanges(gr4)
checkIdentical(target_txdb_dump, as.list(current_txdb))
## 2 orphan transcripts
gr2 <- gr[c(3L, 5L)]
target_transcripts2 <- target_transcripts[c(1,4), ]
target_transcripts2$tx_id <- 1:2
target_splicings2 <- target_splicings[c(1,4), ]
target_splicings2$tx_id <- 1:2
target_splicings2$exon_id <- 1:2
target_txdb_dump <- format_txdb_dump(transcripts=target_transcripts2,
splicings=target_splicings2,
chrominfo=target_chrominfo)
current_txdb <- makeTxDbFromGRanges(gr2)
checkIdentical(target_txdb_dump, as.list(current_txdb))
## 1 gene with no children
gr3 <- gr[-c(2L, 4L)]
current_txdb <- makeTxDbFromGRanges(gr3)
checkIdentical(target_txdb_dump, as.list(current_txdb))
## 0 transcript
gr0 <- gr[0]
target_txdb_dump <- format_txdb_dump(chrominfo=target_chrominfo)
current_txdb <- makeTxDbFromGRanges(gr0)
checkIdentical(target_txdb_dump, as.list(current_txdb))
}
test_makeTxDbFromGRanges_with_exons_and_cds <- function()
{
## 2 exons for tx1a, 1 exon for tx1b, 0 exon for tx2, 3 exons for tx3
tx_ID <- c( "tx1a", "tx1b", "tx2", "tx3")
tx_start <- c( 101, 145, 201, 5)
tx_end <- c( 160, 160, 250, 35)
exon_Parent <- rep.int(tx_ID, c(2L, 1L, 0L, 3L))
exon_start <- c(101, 145, 145, 5, 18, 31)
exon_end <- c(134, 160, 160, 12, 28, 35)
tx_gr <- GRanges("chr1", IRanges(tx_start, tx_end), "+",
type="mRNA", ID=tx_ID, Parent=NA)
exon_gr <- GRanges("chr1", IRanges(exon_start, exon_end), "+",
type="exon", ID=NA, Parent=exon_Parent)
gr <- c(tx_gr, exon_gr)
tx_oo <- c(4L, 1:3)
target_transcripts <- data.frame(
tx_id=1:4,
tx_name=tx_ID[tx_oo],
tx_type="mRNA",
tx_chrom="chr1",
tx_strand="+",
tx_start=tx_start[tx_oo],
tx_end=tx_end[tx_oo]
)
exon_oo <- c(4:6, 1:3)
target_splicings <- data.frame(
tx_id=c(rep(1:3, 3:1), 4),
exon_rank=c(1:3, 1:2, 1, 1),
exon_id=c(1:5, 5:6),
exon_name=c(rep(NA, 6), "tx2"),
exon_chrom="chr1",
exon_strand="+",
exon_start=c(exon_start[exon_oo], 201),
exon_end=c(exon_end[exon_oo], 250)
)
target_chrominfo <- data.frame(
chrom="chr1",
length=NA,
is_circular=NA
)
target_txdb_dump <- format_txdb_dump(transcripts=target_transcripts,
splicings=target_splicings,
chrominfo=target_chrominfo)
current_txdb <- makeTxDbFromGRanges(gr)
checkIdentical(target_txdb_dump, as.list(current_txdb))
## naming exons thru ID
exon_ID <- paste0("ex", seq_along(exon_gr))
mcols(exon_gr)$ID <- exon_ID
gr <- c(tx_gr, exon_gr)
target_splicings$exon_id <- 1:7
target_splicings$exon_name <- c(exon_ID[exon_oo], "tx2")
target_txdb_dump <- format_txdb_dump(transcripts=target_transcripts,
splicings=target_splicings,
chrominfo=target_chrominfo)
current_txdb <- makeTxDbFromGRanges(gr)
checkIdentical(target_txdb_dump, as.list(current_txdb))
## adding CDS
cds_Parent <- rep.int(tx_ID, c(2L, 1L, 0L, 2L))
cds_start <- c(122, 145, 149, 21, 31)
cds_end <- c(134, 152, 157, 28, 34)
cds_gr <- GRanges("chr1", IRanges(cds_start, cds_end), "+",
type="CDS", ID=NA, Parent=cds_Parent)
gr <- c(gr, cds_gr)
cds_oo <- c(NA, 4:5, 1:3)
target_splicings <- cbind(
target_splicings,
cds_id=c(NA, 1:5, NA),
cds_start=c(cds_start[cds_oo], NA),
cds_end=c(cds_end[cds_oo], NA)
)
target_txdb_dump <- format_txdb_dump(transcripts=target_transcripts,
splicings=target_splicings,
chrominfo=target_chrominfo)
current_txdb <- makeTxDbFromGRanges(gr)
checkIdentical(target_txdb_dump, as.list(current_txdb))
## with exons not separated by introns
exon_end[c(1, 5)] <- c(144, 36)
end(exon_gr) <- exon_end
gr <- c(tx_gr, exon_gr)
tx_oo <- 1:3
target_transcripts <- data.frame(
tx_id=1:3,
tx_name=tx_ID[tx_oo],
tx_type="mRNA",
tx_chrom="chr1",
tx_strand="+",
tx_start=tx_start[tx_oo],
tx_end=tx_end[tx_oo]
)
exon_oo <- 1:3
target_splicings <- data.frame(
tx_id=c(1, 1, 2, 3),
exon_rank=c(1, 2, 1, 1),
exon_id=1:4,
exon_name=c(exon_ID[exon_oo], "tx2"),
exon_chrom="chr1",
exon_strand="+",
exon_start=c(exon_start[exon_oo], 201),
exon_end=c(exon_end[exon_oo], 250)
)
target_txdb_dump <- format_txdb_dump(transcripts=target_transcripts,
splicings=target_splicings,
chrominfo=target_chrominfo)
current_txdb <- suppressWarnings(makeTxDbFromGRanges(gr))
checkIdentical(target_txdb_dump, as.list(current_txdb))
}
test_makeTxDbFromGRanges_with_multiple_part_transcripts <- function()
{
## 1 gene with 2 transcripts + 1 gene with 1 transcript
gene_ID <- c("gene1", "gene2")
gene_start <- c( 101, 5)
gene_end <- c( 160, 35)
tx_ID <- c( "tx1a", "tx1a", "tx1b", "tx2", "tx2", "tx2")
tx_Parent <- c("gene1", "gene1", "gene1", "gene2", "gene2", "gene2")
tx_start <- c( 145, 101, 145, 31, 5, 18)
tx_end <- c( 160, 134, 160, 35, 12, 28)
gene_gr <- GRanges("chr1", IRanges(gene_start, gene_end), "+",
type="gene", ID=gene_ID, Parent=NA)
tx_gr <- GRanges("chr1", IRanges(tx_start, tx_end), "+",
type="mRNA", ID=tx_ID, Parent=tx_Parent)
gr <- c(gene_gr, tx_gr)
target_transcripts <- data.frame(
tx_id=1:3,
tx_name=c("tx2", "tx1a", "tx1b"),
tx_type="mRNA",
tx_chrom="chr1",
tx_strand="+",
tx_start=c(5, 101, 145),
tx_end=c(35, 160, 160)
)
target_splicings <- data.frame(
tx_id=target_transcripts$tx_id,
exon_rank=1,
exon_id=1:3,
exon_name=target_transcripts$tx_name,
exon_chrom=target_transcripts$tx_chrom,
exon_strand=target_transcripts$tx_strand,
exon_start=target_transcripts$tx_start,
exon_end=target_transcripts$tx_end
)
target_genes <- data.frame(
tx_id=1:3,
gene_id=rep.int(c("gene2", "gene1"), c(1, 2))
)
target_chrominfo <- data.frame(
chrom="chr1",
length=NA,
is_circular=NA
)
target_txdb_dump <- format_txdb_dump(transcripts=target_transcripts,
splicings=target_splicings,
genes=target_genes,
chrominfo=target_chrominfo)
current_txdb <- suppressWarnings(makeTxDbFromGRanges(gr))
checkIdentical(target_txdb_dump, as.list(current_txdb))
## adding 1 exon per transcript part
exon_gr <- GRanges("chr1", IRanges(tx_start, tx_end), "+",
type="exon", ID=NA, Parent=tx_ID)
gr <- c(gr, exon_gr)
exon_oo <- c(5:6, 4, 2, 1, 3)
target_splicings <- data.frame(
tx_id=rep.int(1:3, 3:1),
exon_rank=c(1:3, 1:2, 1),
exon_id=c(1:5, 5),
exon_chrom="chr1",
exon_strand="+",
exon_start=tx_start[exon_oo],
exon_end=tx_end[exon_oo]
)
target_txdb_dump <- format_txdb_dump(transcripts=target_transcripts,
splicings=target_splicings,
genes=target_genes,
chrominfo=target_chrominfo)
current_txdb <- suppressWarnings(makeTxDbFromGRanges(gr))
checkIdentical(target_txdb_dump, as.list(current_txdb))
}
test_makeTxDbFromGRanges_with_exons_and_cds_as_gene_direct_children <-
function()
{
## 1 gene with 2 transcripts + 1 gene with 3 exons as direct children +
## 1 gene with 2 CDS as direct children
gene_ID <- c("gene1", "gene2", "gene3")
gene_start <- c( 101, 5, 325)
gene_end <- c( 160, 35, 370)
tx_ID <- c( "tx1a", "tx1b")
tx_Parent <- c("gene1", "gene1")
tx_start <- c( 101, 145)
tx_end <- c( 160, 160)
exon_ID <- c( "ex1", "ex2", "ex3", "ex4", "ex5", "ex6")
exon_Parent <- c("tx1a", "tx1a", "tx1b", "gene2", "gene2", "gene2")
exon_start <- c( 145, 101, 145, 31, 5, 18)
exon_end <- c( 160, 134, 160, 35, 12, 28)
cds_Parent <- c("gene3", "gene3")
cds_start <- c( 351, 325)
cds_end <- c( 370, 340)
gene_gr <- GRanges("chr1", IRanges(gene_start, gene_end), "+",
type="gene", ID=gene_ID, Parent=NA)
tx_gr <- GRanges("chr1", IRanges(tx_start, tx_end), "+",
type="mRNA", ID=tx_ID, Parent=tx_Parent)
exon_gr <- GRanges("chr1", IRanges(exon_start, exon_end), "+",
type="exon", ID=exon_ID, Parent=exon_Parent)
cds_gr <- GRanges("chr1", IRanges(cds_start, cds_end), "+",
type="CDS", ID=NA, Parent=cds_Parent)
gr <- c(gene_gr, tx_gr, exon_gr, cds_gr)
target_transcripts <- data.frame(
tx_id=1:4,
tx_name=c("gene2", "tx1a", "tx1b", "gene3"),
tx_type=c("gene", "mRNA", "mRNA", "gene"),
tx_chrom="chr1",
tx_strand="+",
tx_start=c(5, 101, 145, 325),
tx_end=c(35, 160, 160, 370)
)
exon_oo <- c(5:6, 4, 2, 1, 3)
cds_oo <- 2:1
target_splicings <- data.frame(
tx_id=rep.int(1:4, c(3:1, 2)),
exon_rank=c(1:3, 1:2, 1, 1:2),
exon_id=1:8,
exon_name=c(exon_ID[exon_oo], NA, NA),
exon_chrom="chr1",
exon_strand="+",
exon_start=c(exon_start[exon_oo], cds_start[cds_oo]),
exon_end=c(exon_end[exon_oo], cds_end[cds_oo]),
cds_id=c(rep.int(NA, length(exon_oo)), 1, 2),
cds_start=c(rep.int(NA, length(exon_oo)), cds_start[cds_oo]),
cds_end=c(rep.int(NA, length(exon_oo)), cds_end[cds_oo])
)
target_genes <- data.frame(
tx_id=1:4,
gene_id=rep.int(c("gene2", "gene1", "gene3"), c(1, 2, 1))
)
target_chrominfo <- data.frame(
chrom="chr1",
length=NA,
is_circular=NA
)
target_txdb_dump <- format_txdb_dump(transcripts=target_transcripts,
splicings=target_splicings,
genes=target_genes,
chrominfo=target_chrominfo)
current_txdb <- makeTxDbFromGRanges(gr)
checkIdentical(target_txdb_dump, as.list(current_txdb))
## Shuffling the input should not change anything.
for (i in 1:5) {
current_txdb <- makeTxDbFromGRanges(sample(gr))
checkIdentical(target_txdb_dump, as.list(current_txdb))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.