#################
## Extract gene expression, compute logRPKM and attach it to the matrix
#################
# 1. get vector of symbols of genes for which a variant was called
# 2. read the list from featurecounts
# 3. compute logRPKM
# 4. Subset only those genes
# 5. associate samples_genes and merge it with teh variants
add_log_rpkm <- function(variants, # this is the dataframe
gene_expression,
sample_names){
# 1. Read in counts obtained with featurecounts and saved as RDS file
counts <- readRDS(gene_expression)
# Only keep counts for samples defined in sample_names
grep_sample_columns <- grep(paste(sample_names,collapse = "|"),colnames(counts$counts))
counts$counts <- counts$count[,grep_sample_columns]
counts1 <- sapply(sample_names,function(name){
reassign_names(name,counts$counts)
} )
colnames(counts$counts)[counts1] <- names(counts1)
# 3. Compute CPM and convert to gene symbols
# gene.length = counts$annotation$Length
rpkmCounts <- edgeR::cpm(counts$counts,log = TRUE)
rpkmCounts <- rpkmCounts[rownames(rpkmCounts) %in% variants$GeneID,]
# 4. Convert from wide to long format
rpkmCounts <- data.frame(rpkmCounts)
rpkmCounts$GeneID <- rownames(rpkmCounts)
rpkmCounts_long <- rpkmCounts %>% tidyr::gather(SampleName,LogRpkm,sample_names)
rpkmCounts_long$SYMBOL <- variants$SYMBOL[match(rpkmCounts_long$GeneID,variants$GeneID)]
# 5. Associate the correct logRPKM to each sample
variant_expr <- merge(variants,rpkmCounts_long,all.x=TRUE)
return(variant_expr)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.