R/PlatypusDB_list_projects.R

Defines functions PlatypusDB_list_projects

Documented in PlatypusDB_list_projects

#'Metadata download by project for PlatypusDB
#'
#'@description Lists metadata tables of available projects on PlatypusDB
#' @param keyword Character. Keyword by which to search project ids (First Author, Year) in the database. Defaults to an empty string ("") which will list all projects currently available
#' @return A list of metadata tables by project. List element names correspond to project ids to use in the PlatypusDB_fetch function
#' @export
#' @examples
#' \dontrun{
#'
#' #Get list of all available projects and metadata.
#' PlatypusDB_projects <- PlatypusDB_list_projects()
#'
#' #Names of list are project ids to use in PlatypusDB_fetch function
#' names(PlatypusDB_projects)
#' #Common format: first author, date, letter a-z (all lowercase)
#'
#' #View metadata of a specific project
#' print(PlatypusDB_projects[["Kuhn2021a"]])
#'
#' }
#'
PlatypusDB_list_projects <- function(keyword){

  platypus_url_lookup <- NULL

  if(missing(keyword)) keyword <- ""

  #Get the lookup table
  tryCatch({
    load(url("https://storage.googleapis.com/platypusdb_lookup/platypus_url_lookup.RData"))
    #FOR DEV
    #platypusdb_lookup <- new_lookup

    }, error=function(e){
    message(paste0("Failed to load lookup table. Please verify internet connection \n ", e))})

  platypusdb_lookup <- platypus_url_lookup #Reassignment

  platypusdb_meta <- subset(platypusdb_lookup, stringr::str_detect(platypusdb_lookup$filetype, "metadata"))

  if(keyword != ""){
    platypusdb_meta <- subset(platypusdb_meta, stringr::str_detect(platypusdb_meta$name, keyword))
    print(paste0("Found ", nrow(platypusdb_meta), " project ids containing keyword"))
    if(nrow(platypusdb_meta) == 0){
      stop("Please retry with a different keyword or provide an empty string to the keyword argument to list all projects")
    }
  }

  out.list <- list()
  for(i in 1:nrow(platypusdb_meta)){
    tryCatch({

      curr_download_name <- gsub("\\.RData","",platypusdb_meta$name[i]) #have a name ready to use for objects in the r enviroment without the .RData extension

      load(url(platypusdb_meta$url[i]), envir = .GlobalEnv) #download and load to global enviroment

        out.list[[i]] <- get(curr_download_name) #add the just loaded object into a list
        names(out.list)[i] <- platypusdb_meta$project_id[i]
        rm(list = ls(pattern = curr_download_name, envir = .GlobalEnv), envir = .GlobalEnv)

    }, error=function(e){
      message(paste0("Failed to load",  platypusdb_meta$url[i], " \n" , e))})
  }

  #out list reformatting
  form.list <- list()
  for(i in 1:length(out.list)){

    out.list[[i]] <- out.list[[i]][is.na(out.list[[i]][,2]) == F,]

    form.list[[i]] <- list("dataset_info" = as.data.frame(out.list[[i]][1:2,c(1:(min(which(stringr::str_detect(out.list[[i]][1,], "Free.item")))-1))]),
                           "sample_info" = as.data.frame(out.list[[i]][c(6:nrow(out.list[[i]])),c(1:(min(which(stringr::str_detect(out.list[[i]][6,], "Free.item")))-1))])
                    )

    #setting first rows as titles
    names(form.list[[i]][[1]]) <- form.list[[i]][[1]][1,]
    form.list[[i]][[1]] <- form.list[[i]][[1]][-1,]
    names(form.list[[i]][[2]]) <- form.list[[i]][[2]][1,]
    form.list[[i]][[2]] <- form.list[[i]][[2]][-1,]

    names(form.list)[i] <- names(out.list)[i]
  }
    return(form.list) #return loaded files
  }

Try the Platypus package in your browser

Any scripts or data that you put into this service are public.

Platypus documentation built on Aug. 15, 2022, 9:08 a.m.