knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval=FALSE )
library(GwasDataImport)
# library(devtools) # load_all() # ieugwasr::select_api("dev2") # Authorise ieugwasr::get_access_token() # This is a small file, good for testing ebi_id <- "GCST005522" x <- EbiDataset$new( ebi_id = ebi_id, ftp_path = get_ftp_path(ebi_id), igd_id = "ebi-a-EBITEST", wd = paste0("uploads/", ebi_id) ) x$pipeline()
Now delete:
x$api_gwasdata_delete()
ebi_id <- "GCST005522" x <- EbiDataset$new( ebi_id = ebi_id, ftp_path = get_ftp_path(ebi_id), igd_id = "ebi-a-EBITEST", wd = paste0("uploads/", ebi_id) ) x$download_dataset() x$format_ebi_dataset() x$determine_columns(params=list(chr_col=1, snp_col=2, pos_col=3, oa_col=4, ea_col=5, eaf_col=6, beta_col=7, se_col=8, pval_col=9), gwas_file=x$gwas_out1) x$format_dataset(gwas_file=x$gwas_out1) x$organise_metadata() x$api_metadata_upload() x$api_gwasdata_upload() x$api_gwasdata_delete()
To get a list of datasets that need to be imported:
ignore_list <- scan(system.file(package="EbiDataImport", "extdata/ebi_ignore_list.txt"), character()) newdats <- determine_new_datasets(blacklist=ignore_list) newdats <- being_processed(newdats) %>% subset(., need)
ignore <- list() for(i in 2:nrow(newdats)) { message(newdats$ebi_id[i]) x <- EbiDataset$new( ebi_id = newdats$ebi_id[i], ftp_path = newdats$path[i], wd = paste0("uploads/", newdats$ebi_id[i]) ) o <- x$pipeline() if(! "NULL" %in% class(o)) { ignore[[newdats$ebi_id[i]]] <- o } rm(x) }
x <- EbiDataset$new("GCST000755", ftp_path=get_ftp_path("GCST000755")) x$download_dataset() x$format_dataset()
ignore_list <- scan(system.file(package="EbiDataImport", "extdata/ebi_ignore_list.txt"), character()) newdats <- determine_new_datasets(blacklist=ignore_list, exclude = FALSE) suhre <- subset(newdats, grepl("Suhr", path)) suhre <- separate(suhre, path, sep="/", into=c("p1","p2","p3"), remove=F) temp <- do.call(rbind, strsplit(suhre$p3, split="_")) suhre$igd_id <- paste0("prot-c-", temp[,1], "_", temp[,2]) suhre$igd_id traitinfo <- data.table::fread("http://metabolomics.helmholtz-muenchen.de/pgwas/download/probeanno.tsv", sep="\t") traitinfo$igd_id <- paste0("prot-c-", traitinfo$seqid) suhre <- merge(suhre, traitinfo, by="igd_id") save(suhre, file="suhre.rdata") load("suhre.rdata") for(i in 1:nrow(suhre)) { x <- EbiDataset$new( ebi_id=suhre$ebi_id[i], ftp_path=suhre$path[i], igd_id=suhre$igd_id[i], traitname=suhre$target[i] ) x$download_dataset() x$pipeline() }
Conclusion - they seem to be missing standard errors? Need to do this through the batch processing.
Need to figure out a solution for storing this. Many EBI datasets don't have sufficient info to be included, so we need to ignore them instead of trying to upload them each time we look for new studies.
On the ieu-ssd
machine where the original set of EBI datasets was downloaded and processed, this is how the first ignore list was determined:
library(dplyr) a <- read.csv("/data/ebi_gwascatalog/pmids.txt") b <- scan("/data/ebi_gwascatalog/study_ids.txt", character()) got <- ieugwasr::gwasinfo() %>% subset(., grepl("ebi-a", id)) %>% {.$id} %>% gsub("ebi-a-", "", .) a$got <- a$StudyID %in% got head(a) table(a$got) table(a$got, a$Status) ebi_ignore_list <- a$StudyID[a$Status == "harmonised" & !a$got] write.table(ebi_ignore_list, file="ebi_ignore_list.txt", row=F, col=F, qu=F)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.