knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
Load libraries for data import and processing.
library(rcrispr) library(tidyverse) library(readxl)
Set location where data will be stored NOTE: this will need to be updated depending on where you want to prepare and store the data.
dirpath <- '~/cancer/Rpackages/rcrispr/inst/extdata'
The following library and datasets are detailed in:
Colic, M., Wang, G., Zimmermann, M. et al.
Identifying chemogenetic interactions from CRISPR screens with drugZ.
Genome Med 11, 52 (2019).
https://doi.org/10.1186/s13073-019-0665-3Zimmermann, M., Murina, O., Reijns, M.A.M. et al.
CRISPR screens identify genomic ribonucleotides as a source of PARP-trapping lesions.
Nature 559, 285–289 (2018).
https://doi.org/10.1038/s41586-018-0291-z
This will download a gzipped TSV of the base Toronto KnockOut (TKO) CRISPR Library v1 (TKOv1) from http://tko.ccbr.utoronto.ca.
# Download TKOv1 TKOv1_gz <- file.path(dirpath, 'TKOv1-base-90k-library-91320_sequences.gz') download.file(url = "http://tko.ccbr.utoronto.ca/Data/TKOv1-base-90k-library-91320_sequences.gz", destfile = TKOv1_gz) TKOv1 <- read.delim(gzfile(TKOv1_gz,'rt'), sep = "\t", header = F, col.names = c('SEQ', 'LOCUS', 'TARGET')) # Split guide LOCUS to get chromosome, coordinates gene and strand # Remove chr, start, end and strand for CTRL guides # For chr10Promiscuous and chr10Rand set GENE as chr10 # Set sgRNA (ID) as <GENE>_<SEQ> CTRL_guides <- c('chr10Promiscuous', 'chr10Rand', 'LacZ', 'EGFP', 'luciferase') TKOv1_ann <- TKOv1 %>% separate(col = LOCUS, into = c('COORDS', 'GENE', 'STRAND'), sep = "_", remove = FALSE, extra = 'drop') %>% separate(col = COORDS, into = c('CHR', 'START', 'END'), sep = "[\\:\\-]", remove = TRUE, extra = 'drop') %>% mutate(CHR = ifelse(TARGET %in% CTRL_guides, NA, CHR), START = ifelse(TARGET %in% CTRL_guides, NA, START), END = ifelse(TARGET %in% CTRL_guides, NA, END), STRAND = ifelse(TARGET %in% CTRL_guides, NA, STRAND)) %>% mutate(GENE = ifelse(TARGET == 'chr10Promiscuous' | TARGET == 'chr10Rand', 'chr10', GENE)) %>% unite(sgRNA, GENE, SEQ, sep = "_", remove = FALSE) # Write annotated TKO library to file write.table(TKOv1_ann, file = file.path(dirpath, 'TKOv1.tsv'), row.names = F, sep = "\t", quote = F) # Remove TKOv1 zipped TSV file.remove(TKOv1_gz) # Show first 10 sgRNAs head(TKOv1_ann)
# Download Colic et al raw counts colic_raw_counts_zip <- file.path(dirpath, 'readcounts-drugZ-updated_May2019.zip') download.file(url = "https://ndownloader.figshare.com/files/16170896", destfile = colic_raw_counts_zip) # Extract and read in HeLa raw read count matrix HeLa_read_counts = read.delim(unz(colic_raw_counts_zip, 'readcounts-drugZ-updated_May2019/readcounts-HeLa_ola.txt'), stringsAsFactors = F) # Loop over sample columns and write one count file per sample # If sample is not T0 (*_T0), compress the count file dir.create(file.path(dirpath, 'HeLa_raw_sample_counts')) for (i in 3:9) { sample_count_filename <- file.path(dirpath, 'HeLa_raw_sample_counts', paste0(colnames(HeLa_read_counts)[i], '.tsv')) sample_counts <- HeLa_read_counts %>% select(sgRNA, GENE, !!colnames(HeLa_read_counts)[i]) write.table(sample_counts, sample_count_filename, sep = "\t", row.names = F, quote = F) if (!grepl('_T0', colnames(HeLa_read_counts)[i])) { R.utils::gzip(sample_count_filename) } } # Remove raw counts zip file file.remove(colic_raw_counts_zip) # Show first 10 sgRNAs head(HeLa_read_counts)
The c-sar sample metadata consists of a table with one row per sample and requires the following columns:
In addition, there are several optional columns used by c-sar for plotting:
We don't need to worry about the specific names given to the columns as c-sar uses the column index instead of column names.
# Prepare sample mapping from read count column names (samples) HeLa_sample_mapping <- data.frame('sample_name' = colnames(HeLa_read_counts)[3:9], 'plasmid' = c(1, rep(0, 6)), 'control' = c(0, rep(1, 3), rep(0, 3)), 'treatment' = c(rep(0, 4), rep(1, 3))) # Add filename (<sample_name>.tsv or <sample_name>.tsv.gz) # Add group ('T0', 'untreated-control', 'olaparib-treated') HeLa_sample_mapping <- HeLa_sample_mapping %>% mutate(filename = paste0(sample_name, '.tsv'), filename = ifelse(control, paste0(filename, '.gz'), filename)) %>% mutate(treatment_group = case_when(plasmid == 1 ~ 'T0', control == 1 ~ 'untreated-control', treatment == 1 ~ 'olaparib-treated', TRUE ~ 'unknown')) # Write HeLa sample metadata to file write.table(HeLa_sample_mapping, file = file.path(dirpath, 'HeLa_olaparib_sample_metadata.tsv'), row.names = F, sep = "\t", quote = F)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.