library(tidyverse)
# this script reads SGA_data_combined data frame which contains costanzo 2016 data
# and creates a data frame containing SGD systematic gene names and the Allele names used in the experiments
# also removes suppressor mutation data since we weren't interested in those
net.df <- readRDS('analysis/data/derived_data/SGA_data_combined.rds.gz')
# Take query data and array data as separate data frames
# give meaningful column names to both
# combine two data frames, remove suppressor mutations, extract unique rows and save
q.data <- net.df[, c(1, 2)] %>% distinct()
a.data <- net.df[, c(3, 4)] %>% distinct()
colnames(q.data) <- c("Systematic gene name", "Allele Gene name")
colnames(a.data) <- c("Systematic gene name", "Allele Gene name")
strain_ids <- bind_rows(q.data, a.data) %>%
distinct() %>%
filter(grepl("supp", `Allele Gene name`) == F)
write_csv(strain_ids,'analysis/data/derived_data/strain_ids.csv')
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.