#' @title Gets the number of phenotypic variables in the study
#'
#' @param phs dbGap study ID (phs00xxxx, or 00xxxx, or xxx)
#'
#' @return Return the number of phenotypic datatables in the study
#'
#' @description This function extracts informations from data.dict.xml files from the dbgap ftp server to get the study characteristics. Works only for a parent study.
#' @import RCurl
#'
#' @author Gregoire Versmee, Laura Versmee
#' @export
n.variables <- function(phs) {
phs <- phs.version(phs)
url<- paste0("ftp://anonymous:anonymous@ftp.ncbi.nlm.nih.gov/dbgap/studies/", unlist(strsplit(phs, "\\."))[1], "/", phs, "/")
filenames <- strsplit(RCurl::getURL(url, ftp.use.epsv = TRUE, dirlistonly = TRUE), "\n")[[1]]
phenodir <- paste0(url, filenames[grep("pheno", filenames)], "/")
filelist <- strsplit(RCurl::getURL(phenodir, ftp.use.epsv = FALSE, dirlistonly = TRUE), "\n")[[1]]
temp <- filelist[(grepl(".data_dict.xml", filelist)) & (!grepl("Sample_Attributes.data_dict.xml", filelist)) &
(!grepl("Subject.data_dict.xml", filelist)) & (!grepl("Sample.data_dict.xml", filelist)) & (!grepl("Pedigree.data_dict.xml", filelist))]
mcl <- parallel::mclapply(temp, function(e) {
xmllist <- XML::xmlToList(RCurl::getURLContent(paste0(phenodir, e)))
return(length(which(names(xmllist) == "variable")))
}, mc.cores = getOption("mc.cores", parallel::detectCores()))
return(Reduce(sum, mcl))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.