R/main.R

Defines functions rip

#' @export
rip <- function(dbConn, nPages = 38, dbConn = NULL) {

  RSP_IDS <- unlist(pagenav(1:nPages, PAGE_BASE, function(res, rsp) {
    xml_text(xml_find_all(content(res, 'parsed'), '//div[2]/header/em'))
  }))

  FILE_HREFS <- pagenav(RSP_IDS, STRAIN_BASE, function(rsp, res) {
    list(
      rsp = rsp,
      vcf = xml_attr(xml_find_first(content(res, 'parsed'), '//*[@id="x-section-2"]/div[1]/div[2]/div/strong[2]/em/a'), 'href'),
      fasta1 = xml_attr(xml_find_first(content(res, 'parsed'), '//*[@id="x-section-2"]/div[1]/div[2]/div/strong[3]/em/a[1]'), 'href'),
      fasta2 = xml_attr(xml_find_first(content(res, 'parsed'), '//*[@id="x-section-2"]/div[1]/div[2]/div/strong[3]/em/a[2]'), 'href')
    )
  })

  lapply(FILE_HREFS, function(fhr) {
    filled <- names(fhr)[fhr != '']
    setNames(
      lapply(fhr[filled], function(f) {
        vcf <- tempfile()
        tf1 <- tempfile()
        tf2 <- tempfile()

        pqr <- tryCatch(
          list(
            RSP = f$rsp,
            VCF = download.file(f[['vcf']], vcf, 'curl'),
            Read.1 = download.file(f[['fasta1']], tf1, 'curl'),
            Read.2 = download.file(f[['fasta2']], tf2, 'curl'),
            Pheno = extractPhenoFromHTML(res)
          ),
          error = warning
        )

        if (is.null(dbConn))
          dbConn <- dbConnect(
            RSQLite::SQLite(),
            xsystem.file('kannapedia-genomes.sqlite', 'kannarip')
          )

        if (is.character(dbConn))
          bConn <- dbConnect(RSQLite::SQLite(), dbConn)

        Add2DB(pq$VCF, dbConn, 'Kannapdia_VCF')
        Add2DB(pqr$Pheno, dbConn, 'Kannapdia_Pheno')
        # Seqs2DB(pqr$Read.1, 'fastq', dbConn, basename(f$fasta1), 'Kannapedia_R1')
        Seqs2DB(pqr$Read.1, 'fastq', dbConn, pqr$RSP, 'Kannapedia_R1')
        # Seqs2DB(pqr$Read.2, 'fastq', dbConn, basename(f$fasta2), 'Kannapedia_R2')
        Seqs2DB(pqr$Read.2, 'fastq', dbConn, pqr$RSP, 'Kannapedia_R2')

        dbDisconnect(dbConn)

        unlink(vcf)
        unlink(tf1)
        unlink(tf2)

        return(pqr)
      }),
      filled
    )
  })
}
Indicai-dev/kannarip documentation built on Sept. 2, 2020, 12:36 p.m.