knitr::opts_chunk$set(echo = TRUE)
This script reads converts data downloaded from the Allen Institute Cell Types Database (http://celltypes.brain-map.org/rnaseq) into a format compatible for use as comparison data to human fronto-insula.
setwd("C:/Users/jeremym/Desktop/VEN_TEST")
data
. data
.suppressPackageStartupMessages({ library(VENcelltypes) ### NEED THIS library(feather) library(matrixStats) library(dplyr) library(edgeR) library(data.table) }) options(stringsAsFactors=FALSE)
First we need to read the data into R.
exons <- as.matrix(fread("data/human_MTG_2018-06-14_exon-matrix.csv"),rownames=1) introns <- as.matrix(fread("data/human_MTG_2018-06-14_intron-matrix.csv"),rownames=1) geneInfo <- read.csv("data/human_MTG_2018-06-14_genes-rows.csv",row.names=1) sampInfo <- read.csv("data/human_MTG_2018-06-14_samples-columns.csv",row.names=1)
Second, convert the meta-data files into formats consistent with the rest of the analysis. Note that the MTG cluster colors (and other info) which is stored as a data file in VENcelltypes
.
# Omit cells with no class kp <- sampInfo$cluster!="no class" # Format the cluster info anno <- auto_annotate(sampInfo[kp,]) anno$sample_id <- anno$sample_name # Update the correct cluster colors and ids data(clusterInfoMTG) anno$cluster_color <- clusterInfoMTG$cluster_color[match(anno$cluster_label,clusterInfoMTG$cluster_label)] anno$cluster_id <- clusterInfoMTG$cluster_id[match(anno$cluster_label,clusterInfoMTG$cluster_label)]
Next, convert the data into CPM(exons+introns) and format appropriately. For this data set we also precalculate the medians for convenience.
## Calculate CPM CPM <- cpm(introns+exons) rownames(CPM) <- rownames(geneInfo) colnames(CPM) <- sampInfo$sample_id CPM <- CPM[,kp] # Omit cells from outlier clusters as above ## Format appropriately data <- as.data.frame(t(CPM)) data$sample_id <- anno$sample_id ## Calculate cluster medians in MTG data for comparison with FI cl = anno$cluster_id names(cl) = anno$sample_id medianExpr = do.call("cbind", tapply(names(cl), cl, function(x) rowMedians(CPM[,x]))) medianExpr <- as.data.frame(medianExpr) medianExpr$gene <- rownames(geneInfo)
Finally, output the results to feather files in the MTG
directory.
# Create MTG directory dir.create("MTG") # Write annotation file write_feather(anno,"MTG/anno.feather") # Write medians file write_feather(medianExpr,"MTG/medians.feather") # Write data file write_feather(data,"MTG/data.feather")
First we need to read the data into R.
exons <- as.matrix(fread("data/mouse_VISp_2018-06-14_exon-matrix.csv"),rownames=1) introns <- as.matrix(fread("data/mouse_VISp_2018-06-14_intron-matrix.csv"),rownames=1) geneInfo <- read.csv("data/mouse_VISp_2018-06-14_genes-rows.csv",row.names=1) sampInfo <- read.csv("data/mouse_VISp_2018-06-14_samples-columns.csv",row.names=1)
Second, convert the meta-data files into formats consistent with the rest of the analysis.
sampInfo[is.na(sampInfo)]=0 anno <- auto_annotate(sampInfo) anno$sample_id <- anno$sample_name
Next, convert the data into CPM(exons+introns) and format appropriately.
## Calculate CPM CPM <- cpm(introns+exons) rownames(CPM) <- rownames(geneInfo) colnames(CPM) <- anno$sample_id ## Format appropriately data <- as.data.frame(t(CPM)) data$sample_id <- anno$sample_id
Finally, output the results to feather files in the VISp
directory.
# Create MTG directory dir.create("VISp") # Write annotation file write_feather(anno,"VISp/anno.feather") # Write data file write_feather(data,"VISp/data.feather")
First we need to read the data into R. This step is slow.
exons <- as.matrix(fread("data/mouse_ALM_2018-06-14_exon-matrix.csv"),rownames=1) introns <- as.matrix(fread("data/mouse_ALM_2018-06-14_intron-matrix.csv"),rownames=1) geneInfo <- read.csv("data/mouse_ALM_2018-06-14_genes-rows.csv",row.names=1) sampInfo <- read.csv("data/mouse_ALM_2018-06-14_samples-columns.csv",row.names=1)
Second, convert the meta-data files into formats consistent with the rest of the analysis.
sampInfo[is.na(sampInfo)]=0 anno <- auto_annotate(sampInfo) anno$sample_id <- anno$sample_name
Next, convert the data into CPM(exons+introns) and format appropriately.
## Calculate CPM CPM <- cpm(introns+exons) rownames(CPM) <- rownames(geneInfo) colnames(CPM) <- anno$sample_id ## Format appropriately data <- as.data.frame(t(CPM)) data$sample_id <- anno$sample_id
Finally, output the results to feather files in the MTG
directory.
# Create MTG directory dir.create("ALM") # Write annotation file write_feather(anno,"ALM/anno.feather") # Write data file write_feather(data,"ALM/data.feather")
Output session information.
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.