library(metageneInputs) library(ENCODExplorer)
The goals of this vignette are:
designs.RData
object (which contains designs_encode
and
designs_validation
.All the files produced will be save inside a temporary directory and will need to be moved manually:
outdir <- paste0("/tmp/", basename(tempfile(""))) dir.create(outdir) outdir
The main part of the analysis consists in producing the metagene objects for the ENCODE dataset for the GM12878 cell line. In order to do so, we must first fetch the names of the targets which have bam files available in ENCODE:
cell_line <- "GM12878" targets <- get_encode_targets(cell_line) targets
We need to remove the histones:
i <- !grepl("H[0-9]K", targets) targets <- targets[i] targets
We need to remove the control:
i <- targets != "Control" & targets != "mouse" & targets != "rabbit" targets <- targets[i] targets
We produce a design for each target:
# We use the log hack to avoid all the outpus from the queryEncode function log <- capture.output({ designs_encode <- lapply(targets, get_encode_design, cell_line) }) names(designs_encode) <- targets head(designs_encode)
The validation of the results will be performed on the "Variation and genetic control of chromatin architecture in humans" dataset.
Load and clean the descriptions:
library(tidyr) library(dplyr) file_descriptions <- system.file("extdata/E-MTAB-3657.sdrf.txt", package = "metageneInputs") descriptions <- read.table(file_descriptions, header = TRUE, sep = "\t", stringsAsFactors = FALSE) to_keep <- c("Source.Name", "Comment..Derived.ArrayExpress.FTP.file.") descriptions <- descriptions[,colnames(descriptions) %in% to_keep] colnames(descriptions) <- c("individual_target", "url") descriptions <- tbl_df(descriptions) %>% separate(individual_target, into = c("individual", "target"), sep = "_") %>% filter(target %in% c("PU.1", "RPB2"))
Create the design:
design_validation <- data.frame(bam_files = basename(descriptions$url)) i <- descriptions[["target"]] == "PU.1" j <- descriptions[["target"]] == "RPB2" design_validation[["PU.1"]] <- 0 design_validation[["RPB2"]] <- 0 design_validation[["PU.1"]][i] <- 1 design_validation[["RPB2"]][j] <- 1 design_validation
Save the results:
outfile <- paste0(outdir, "/designs.RData") save(designs_encode, design_validation, file = outfile)
We will now produce the Makefile to download the bam files.
outfile <- paste0(outdir, "/Makefile") fileConn<-file(outfile, open = "w") writeLines("# Targets", fileConn) close(fileConn) fileConn<-file(outfile, open = "a") writeLines("## ENCODE's files", fileConn) bam_files <- unique(as.character(unlist(lapply(designs_encode, `[[`, "bam_files")))) writeLines(paste0("TARGETS_ENCODE+=", bam_files), fileConn) writeLines("", fileConn) writeLines("## Validation's files", fileConn) bam_files <- as.character(design_validation[["bam_files"]]) writeLines(paste0("TARGETS_VALIDATION+=", bam_files), fileConn) writeLines("", fileConn) writeLines("# URLs", fileConn) writeLines("URL_ENCODE=https://www.encodeproject.org/files", fileConn) writeLines("URL_VALIDATION=ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-3657", fileConn) writeLines("", fileConn) writeLines("# Phony targets", fileConn) writeLines(".PHONY: all", fileConn) writeLines("all: $(TARGETS_ENCODE) $(TARGETS_VALIDATION)", fileConn) writeLines("", fileConn) writeLines("# Recipes", fileConn) writeLines("ENCFF%.bam:", fileConn) writeLines("\twget $(URL_ENCODE)/ENCFF$*/@@download/$@", fileConn) writeLines("", fileConn) writeLines("E-MTAB%.bam:", fileConn) writeLines("\twget $(URL_VALIDATION)/$@", fileConn) close(fileConn)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.