knitr::opts_chunk$set(echo = TRUE)

Make small version of Arabidopsis genome file for testing functions.

This only includes the first ca. 100,000 bases of chromosome 1.

library(tidyverse)

# Download genome
download.file(
  url = "ftp://ftp.ensemblgenomes.org/pub/release-40/plants/fasta/arabidopsis_thaliana/dna/Arabidopsis_thaliana.TAIR10.dna.toplevel.fa.gz",
  destfile = here::here("data-raw/Arabidopsis_thaliana.TAIR10.dna.toplevel.fa.gz")
  )

# Unzip
R.utils::gunzip(
  filename = here::here("data-raw/Arabidopsis_thaliana.TAIR10.dna.toplevel.fa.gz"),
  destname = here::here("data-raw/Arabidopsis_thaliana.TAIR10.dna.toplevel.fasta")
)

# Read in fasta
arabidopsis_genome <- ape::read.FASTA(here::here("data-raw/Arabidopsis_thaliana.TAIR10.dna.toplevel.fasta"))

# Fix names (only take first part of name separated by space)
names(arabidopsis_genome) <-
  names(arabidopsis_genome) %>% str_split(" ") %>% map_chr(1)

# Keep only first 100,000 nucleotides of chromosome 1
arabidopsis_genome_small <- arabidopsis_genome["1"]
arabidopsis_genome_small[["1"]] <- arabidopsis_genome_small[["1"]][1:100000]

# Write out FASTA file to inst/extdata
ape::write.FASTA(
  arabidopsis_genome_small,
  here::here("inst/extdata/Arabidopsis_thaliana_TAIR10_40_small.fasta"))

sessionInfo()


joelnitta/baitfindR documentation built on May 7, 2020, 6:21 p.m.