knitr::opts_knit$set(root.dir = here::here(''))

clean

dir(here::here('data'), full.names = T) %>% file.remove()
rm(list = ls(envir = globalenv(), all = T))

Process raw data

# accession = 'PRJNA196404'
read_meta <- function(accession) {
    sra_runinfo <- paste0('data-raw/meta/', accession, '-RunInfo.csv') %>%
        readr::read_csv(col_types = libzhuoer::cols_char()) %>%
        dplyr::select('experiment' = 'Experiment', 'sample' = 'SampleName', 'run' = 'Run', 'len' = 'avgLength')
    sra_summary <- paste0('data-raw/meta/', accession, '.tsv') %>%
        readr::read_tsv(col_types = libzhuoer::cols_char()) %>%
        dplyr::select('run' = 'run_accession', 'layout' = 'library_layout', 'species' = 'scientific_name', 'fastq_ftp')
    sra <- dplyr::full_join(sra_runinfo, sra_summary, by = 'run')

    ena <- paste0('data-raw/meta/', accession, '-result.csv') %>% 
        readr::read_csv(col_types = libzhuoer::cols_char()) %>%
        dplyr::select('experiment' = 'Experiment Accession', 'title' = 'Experiment Title', 'Instrument')
    result <- dplyr::full_join(sra, ena, by = 'experiment')

    dplyr::select(result, -'experiment', -'fastq_ftp', 'fastq_ftp')
}


PRJNA196404 <- read_meta('PRJNA196404') %T>% print(n = Inf)
PRJNA196404 %>% dplyr::filter(species == 'Saccharomyces cerevisiae') %>% .$run %>% writeLines()
PRJNA196404 %>% dplyr::filter(species == 'Homo sapiens') %>% .$run %>% writeLines()
PRJNA196404 %>% dplyr::select(run, title) %>% print(n = Inf)

assertthat::assert_that(all(PRJNA196404$layout == 'SINGLE'), msg = 'detect pair-end')




PRJNA128665 <- read_meta('PRJNA128665') %T>% print()

Afterward

devtools::test()     # test the new data
roxygen2::roxygenize() # you may also have edited data documentation

system('R CMD INSTALL --no-multiarch --with-keep.source .')
devtools::reload()   # now you can use the new data in current R session 


dongzhuoer/shapebase documentation built on Jan. 9, 2020, 12:08 p.m.