Nothing
GENOME <- "hg38"
ORGANISM <- "Homo sapiens"
ASSEMBLED_MOLECULES <- paste0("chr", c(1:22, "X", "Y", "M"))
CIRC_SEQS <- "chrM"
library(IRanges) # for CharacterList()
library(GenomeInfoDb) # for fetch_chrom_sizes_from_UCSC()
.order_seqlevels <- function(seqlevels)
{
tmp <- CharacterList(strsplit(seqlevels, "_"))
npart <- lengths(tmp)
stopifnot(all(npart <= 3L))
idx1 <- which(npart == 1L)
stopifnot(length(idx1) == length(ASSEMBLED_MOLECULES))
oo1 <- match(ASSEMBLED_MOLECULES, seqlevels[idx1])
stopifnot(!anyNA(oo1))
idx1 <- idx1[oo1]
idx3 <- which(npart == 3L)
m3 <- matrix(unlist(tmp[idx3]), ncol=3L, byrow=TRUE)
m31 <- match(m3[ , 1L], ASSEMBLED_MOLECULES)
stopifnot(!anyNA(m31))
m33 <- match(m3[ , 3L], c("alt", "random", "fix"))
stopifnot(!anyNA(m33))
GenBankAccn_prefixes <- c("GL", # alt-scaffold or unlocalized-scaffold
"JH", # alt-scaffold
"KB", # alt-scaffold
"KI", # alt-scaffold or unlocalized-scaffold
"KN", # fix-patch or novel-patch
"KQ", # fix-patch or novel-patch
"KV", # fix-patch or novel-patch
"KZ") # fix-patch or novel-patch
m32 <- match(substr(m3[ , 2L], 1L, 2L), GenBankAccn_prefixes)
stopifnot(!anyNA(m32))
is_alt_scaffold <- m33 == 1L & m32 <= 4L
is_unlocalized_scaffold <- m33 == 2L & m32 <= 4L
is_fix_patch <- m33 == 3L & m32 >= 5L
is_novel_patch <- m33 == 1L & m32 >= 5L
sequence_role <- integer(length(idx3))
sequence_role[is_alt_scaffold] <- 1L
sequence_role[is_unlocalized_scaffold] <- 2L
sequence_role[is_fix_patch] <- 3L
sequence_role[is_novel_patch] <- 4L
stopifnot(all(sequence_role != 0L))
oo3 <- order(sequence_role, m31, m3[ , 2L])
idx3 <- idx3[oo3]
npatch <- sum(is_fix_patch) + sum(is_novel_patch)
idx3_patch <- tail(idx3, n=npatch)
if (npatch != 0L)
idx3 <- head(idx3, n=-npatch)
idx2 <- which(npart == 2L)
m2 <- matrix(unlist(tmp[idx2]), ncol=2L, byrow=TRUE)
stopifnot(all(m2[ , 1L] == "chrUn"))
oo2 <- order(m2[ , 2L])
idx2 <- idx2[oo2]
c(idx1, idx3, idx2, idx3_patch)
}
GET_CHROM_SIZES <- function(goldenPath.url=getOption("UCSC.goldenPath.url"))
{
chrom_sizes <- GenomeInfoDb:::fetch_chrom_sizes_from_UCSC(GENOME,
goldenPath.url=goldenPath.url)
oo <- .order_seqlevels(chrom_sizes[ , "chrom"])
S4Vectors:::extract_data_frame_rows(chrom_sizes, oo)
}
#NCBI_LINKER <- list(
# assembly_accession="GCF_000001405.26", # GRCh38
# ## The chromInfo table at UCSC contains sequences that belong to
# ## GRCh38.p12 but not to GRCh38. Because we want to map hg38 to
# ## GRCh38 and not to GRCh38.p12, we drop these sequences.
# drop_unmapped=TRUE
#)
NCBI_LINKER <- list(
assembly_accession="GCF_000001405.38" # GRCh38.p12
)
### Sequences not in the original GRCh38 are not mapped to Ensembl!
ENSEMBL_LINKER <- "ucscToEnsembl"
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.