inst/doc/emales.R

## ----eval=FALSE---------------------------------------------------------------
#  library(gggenomes)
#  
#  # parse sequence length and some metadata from fasta file
#  emale_seqs <- read_fai("emales.fna") %>%
#    tidyr::extract(seq_desc, into = c("emale_type", "is_typespecies"), "=(\\S+) \\S+=(\\S+)",
#      remove=F, convert=T) %>%
#    dplyr::arrange(emale_type, length)
#  
#  # plot the genomes - first six only to keep it simple for this example
#  emale_seqs_6 <- emale_seqs[1:6,]
#  p1 <- gggenomes(emale_seqs_6) +
#   geom_seq() + geom_bin_label()
#  p1

## ----eval=FALSE---------------------------------------------------------------
#  emale_genes <- read_gff("emales.gff") %>%
#    dplyr::rename(feature_id=ID) %>%                       # we'll need this later
#    dplyr::mutate(gc_cont=as.numeric(gc_cont))             # per gene GC-content
#  
#  p2 <- gggenomes(emale_seqs_6, emale_genes) +
#    geom_seq() + geom_bin_label() +
#    geom_gene(aes(fill=gc_cont)) +
#    scale_fill_distiller(palette="Spectral")
#  p2

## ----eval=FALSE---------------------------------------------------------------
#  # prefilter hits by minimum length and maximum divergence
#  emale_tirs_paf <- read_paf("emales-tirs.paf") %>%
#    dplyr::filter(seq_id1 == seq_id2 & start1 < start2 & map_length > 99 & de < 0.1)
#  emale_tirs <- bind_rows(
#    dplyr::select(emale_tirs_paf, seq_id=seq_id1, start=start1, end=end1, de),
#    dplyr::select(emale_tirs_paf, seq_id=seq_id2, start=start2, end=end2, de))
#  
#  p3 <- gggenomes(emale_seqs_6, emale_genes, emale_tirs) +
#    geom_seq() + geom_bin_label() +
#    geom_feature(size=5) +
#    geom_gene(aes(fill=gc_cont)) +
#    scale_fill_distiller(palette="Spectral")
#  p3

## ----eval=FALSE---------------------------------------------------------------
#  emale_links <- read_paf("emales.paf")
#  
#  p4 <- gggenomes(emale_seqs_6, emale_genes, emale_tirs, emale_links) +
#    geom_seq() + geom_bin_label() +
#    geom_feature(size=5, data=use_features(features)) +
#    geom_gene(aes(fill=gc_cont)) +
#    geom_link() +
#    scale_fill_distiller(palette="Spectral")
#  
#  p4 <- p4 %>% flip_bins(3:5)
#  p4

## ----eval=FALSE---------------------------------------------------------------
#  emale_gc <- thacklr::read_bed("emales-gc.tsv") %>%
#    dplyr::rename(seq_id=contig_id)
#  
#  p5 <- p4 %>% add_features(emale_gc)
#  p5 <- p5 + geom_ribbon(aes(x=(x+xend)/2, ymax=y+.24, ymin=y+.38-(.4*score),
#      group=seq_id, linetype="GC-content"), use_features(emale_gc),
#                         fill="blue", alpha=.5)
#  p5

## ----eval=FALSE---------------------------------------------------------------
#  emale_cogs <- read_tsv("emales-cogs.tsv", col_names = c("feature_id", "cluster_id", "cluster_n"))
#  emale_cogs %<>% dplyr::mutate(
#    cluster_label = paste0(cluster_id, " (", cluster_n, ")"),
#    cluster_label = fct_lump_min(cluster_label, 5, other_level = "rare"),
#    cluster_label = fct_lump_min(cluster_label, 15, other_level = "medium"),
#    cluster_label = fct_relevel(cluster_label, "rare", after=Inf))
#  emale_cogs
#  
#  
#  p6 <- gggenomes(emale_seqs_6, emale_genes, emale_tirs, emale_links) %>%
#    add_features(emale_gc) %>%
#    add_clusters(genes, emale_cogs) %>%
#    flip_bins(3:5) +
#    geom_seq() + geom_bin_label() +
#    geom_feature(size=5, data=use_features(features)) +
#    geom_gene(aes(fill=cluster_label)) +
#    geom_link() +
#    geom_ribbon(aes(x=(x+xend)/2, ymax=y+.24, ymin=y+.38-(.4*score),
#      group=seq_id, linetype="GC-content"), use_features(emale_gc),
#                         fill="blue", alpha=.5) +
#    scale_fill_brewer("Conserved genes", palette="Set3")
#  
#  p6

## ----eval=FALSE---------------------------------------------------------------
#  emale_blast <- read_blast("emales_mavirus-blastp.tsv")
#  emale_blast %<>%
#    dplyr::filter(evalue < 1e-3) %>%
#    dplyr::select(feature_id=qaccver, start=qstart, end=qend, saccver) %>%
#    dplyr::left_join(read_tsv("mavirus.tsv", col_names = c("saccver", "blast_hit", "blast_desc")))
#  
#  # manual annotations by MFG
#  emale_transposons <- read_gff("emales-manual.gff", types = c("mobile_element"))
#  
#  
#  p7 <- gggenomes(emale_seqs_6, emale_genes, emale_tirs, emale_links) %>%
#    add_features(emale_gc) %>%
#    add_clusters(genes, emale_cogs) %>%
#    add_features(emale_transposons) %>%
#    add_subfeatures(genes, emale_blast, transform="aa2nuc") %>%
#    flip_bins(3:5) +
#    geom_feature(aes(color="integrated transposon"),
#      use_features(emale_transposons), size=7) +
#    geom_seq() + geom_bin_label() +
#    geom_link(offset = c(0.3, 0.2), color="white", alpha=.3) +
#    geom_feature(aes(color="terminal inverted repeat"), use_features(features),
#      size=4) +
#    geom_gene(aes(fill=cluster_label)) +
#    geom_feature(aes(color=blast_desc), use_features(emale_blast), size=2,
#      position="pile") +
#    geom_ribbon(aes(x=(x+xend)/2, ymax=y+.24, ymin=y+.38-(.4*score),
#      group=seq_id, linetype="GC-content"), use_features(emale_gc),
#                         fill="blue", alpha=.5) +
#    scale_fill_brewer("Conserved genes", palette="Set3") +
#    scale_color_viridis_d("Blast hits & Features", direction = -1) +
#    scale_linetype("Graphs") +
#    ggtitle(expression(paste("Endogenous mavirus-like elements of ",
#    italic("C. burkhardae"))))
#  
#  p7

Try the gggenomes package in your browser

Any scripts or data that you put into this service are public.

gggenomes documentation built on Sept. 11, 2024, 8:55 p.m.