temp_path <- tempfile()
download.file("https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/annotation/chm13v2.0_RefSeq_Liftoff_v5.2.gff3.gz", temp_path)
gff_col_names <- c(
"chr",
"source",
"type",
"start",
"end",
"score",
"strand",
"phase",
"attributes"
)
anno_t2t <- read_tsv(
temp_path,
col_names = gff_col_names,
comment = "#"
)
anno_t2t <- read_tsv(
temp_path,
col_names = gff_col_names,
comment = "#"
)
exon_anno_t2t <- anno_t2t %>%
filter(type == "exon") %>%
mutate(
transcript_id = str_extract(attributes, "Parent=(\\w+)", group = 1),
gene_id = str_extract(attributes, "GeneID:(\\w+)", group = 1),
symbol = str_extract(attributes, "gene=(\\w+)", group = 1)
)
exon_anno_t2t_formatted <- exon_anno_t2t %>%
select(
gene_id,
chr,
strand,
start,
end,
transcript_id,
symbol
) %>%
mutate(
gene_id = factor(gene_id),
chr = factor(chr),
strand = factor(strand),
start = as.integer(start),
end = as.integer(end),
transcript_id = factor(transcript_id),
symbol = factor(symbol)
) %>%
drop_na()
anno_name <- paste0("inst/exons_t2t.rds")
saveRDS(exon_anno_t2t_formatted, anno_name, compress = "xz")
fs::file_delete(temp_path)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.