inst/script/pathwaysTXT.R

# Download raw data from https://boonelab.ccbr.utoronto.ca/supplement/costanzo2016/
# Data file S5 (sheet 3)
library(openxlsx)
library(tibble)
library(biomaRt)
library(dplyr)

# Use biomaRt to get gene members per SAFE term
#pathwayFile <- system.file("extdata", "Data_File_S5_SAFE_analysis_Gene_cluster_identity_and_functional_enrichments.xlsx", package = "fedup")
#pathway <- read.xlsx(pathwayFile, sheet = 3)

# Query Ensembl for gene symbols annotated to SAFE terms
#ensembl <- useMart("ensembl", dataset = "scerevisiae_gene_ensembl")
#ensembl_gene <- getBM(
#    attributes = c("go_id", "ensembl_gene_id", "external_gene_name"),
#    mart = ensembl
#)
#colnames(ensembl_gene) <- c("Enriched.GO.IDs", "ORF.ID", "Gene.ID")
#pathway <- left_join(pathway, ensembl_gene, by = "Enriched.GO.IDs")
#write.table(pathway, file.path("inst", "extdata", "SAFE_terms.txt"), quote = FALSE, sep = "\t")

# Raw data file annotated with gene symbols
pathwayFile <- system.file("extdata", "SAFE_terms.txt", package = "fedup")
pathwaysTXT <- readPathways(
    pathwayFile,
    header = TRUE,
    pathCol = "Enriched.GO.names",
    geneCol = "Gene.ID"
)

names(pathwaysTXT) <- stringi::stri_trans_general(names(pathwaysTXT), "latin-ascii")
usethis::use_data(pathwaysTXT, compress = "xz", version = 2, overwrite = TRUE)
rosscm/FEDUP documentation built on July 15, 2021, 2:18 a.m.