R/get_chromosome_list.R

Defines functions get_chromosome_list

Documented in get_chromosome_list

#' A tools4ukbb function
#' Function output: a list of all of the files in your directory for which you have pvar and pgen pairs
#' The chromosome pattern MUST be inbetween two underscores in your file name (ex: S2_chr2_date.pvar or S2_c2_date.pvar)
#'
#' @param pattern the pattern by which you are labeling your chromosomes (eg: for files of pattern sample3_chr2_date.pvar, chromsome = "chr" OR for s1_c2_date.pvar, chromosome = "c")
#' @param directory the directory with your pvar, pgen, and psam files
#' @keywords get_all_chromosomes
#' @export
#' @examples
#' get_chromosome_list()

get_chromosome_list <- function(pattern, directory){
  sequnce <- seq(1:22) %>% append(c("X", "Y"))
  chr_list<- str_c(pattern, sequnce)
  chromosome_expr <- str_c("_", chr_list, "_")
  pgen_list <- intersect(list.files(directory, pattern = paste0(chromosome_expr, collapse="|")), list.files(directory, pattern = ".pgen")) %>%
    str_replace_all(".pgen", "")
  pvar_list <- intersect(list.files(directory, pattern = paste0(chromosome_expr, collapse="|")), list.files(directory, pattern = ".pvar")) %>%
    str_replace_all(".pvar", "")
  chr_intersect <- intersect(pgen_list, pvar_list)

  chr_matches <- map(chromosome_expr, str_subset, string = chr_intersect) %>% map_int(length)

  list_of_chromosomes <- chromosome_expr[chr_matches > 0]
  chr_names <- sequnce[chr_matches > 0]

  print("I have detected the following chromosomes in your data:")
  print(chr_names)

  chr_list <- gsub("_", "", list_of_chromosomes)
}
Lab-Jaiswal/tools4ukbb documentation built on May 12, 2022, 9:11 a.m.