R/create_df_different_alleles.R

library(tidyverse)
# read strain ids input ----------
strain_ids <- read_csv("analysis/data/derived_data/strain_ids.csv")

# find different allele names for every gene and create a separate data frame with different allele names in the 2nd column

allele.frame <- data.frame(orf_name = unique(strain_ids$`Systematic gene name`))
different.alleles <- list()

for (i in 1:nrow(allele.frame)) {
  i.name <- strain_ids %>% filter(`Systematic gene name` == allele.frame$orf_name[i]) %>% select(`Allele Gene name`) %>% pull() %>% list()
    #list(unique(strain_ids$`Allele Gene name`[strain_ids$`Systematic gene name` == allele.frame$orf_name[i]]))
  different.alleles <- c(different.alleles, (i.name))
}
allele.frame$different.alleles <- different.alleles

saveRDS(allele.frame,'analysis/data/derived_data/df_different_alleles.rds')

#t <- nest(strain_ids,`Allele Gene name`)
#multiple <- t %>% filter(map(t$data,nrow)>1) %>% unnest %>% select(`Allele Gene name`)
#single <- t %>% filter(map(t$data,nrow)==1) %>% unnest %>% select(`Allele Gene name`)

#t %>% filter(`Systematic gene name`=='YDL147W') %>%unnest() %>% nrow
#t %>% filter(map(t$data,nrow)>1) %>% select(data) %>% pull() %>% map(sample_n,1) %>% unlist
oacar/pgsNetwork documentation built on Oct. 1, 2019, 9:15 a.m.