# get database
library(genoppi)
ref = msigdb_c5_table
ref = msigdb_h_table
terms = unique(ref$Set.name)
colnames(ref) <- c('gene','pathway')
ref$significant <- TRUE
# get high impact table
df <- read.table('~/Desktop/210131_ukbb_mfi_vep_all_HIGH_maf_info.txt')
colnames(df) <- c('CHR','BP1','SNPID','RSID','REF','ALT','MAF','MA','INFO','BP2','MINOR','MAJOR','CONS')
df <- df[df$INFO > 0.9,] # setup INFO
df <- df[!is.na(df$BP1), ] # remove some NA's
# get consequence
cons <- as.data.frame(do.call(rbind, strsplit(df$CONS, '\\|')))
colnames(cons) <- c('CSQ','ENST','TRANS','TYPE','IMPACT','GENE','HGNC','AFFECT','YES',"CDS","AA_POS",'EXON','INTRON')
cons$TYPE_SIMPLE <- unlist(lapply(strsplit(cons$TYPE, split = '\\&'), function(x) x[1]))
combined = cbind(df, cons)
###############################
# or done in bash on server ###
# cat ukb_mfi_homo_plof_mrg_chr* | cut -d" " -f11 | sort | uniq
scanned_genes <- scan(what = character())
lof_genes <- data.frame(gene=scanned_genes)
#lof_genes <- data.frame(gene = combined$GENE)
ref <- gtex_rna
ref$lof <- ref$gene %in% lof_genes$gene
# get genes that are specific and LOF
result <- ref[ref$significant == T & ref$lof == T,]
gene_df <- as.data.frame(table(result$tissue))
colnames(gene_df) <- c('tissue','n.genes')
# get all genes that are specific
tissue_table <- table(gtex_rna$tissue[gtex_rna$significant])
tissue_df <- as.data.frame(tissue_table)
colnames(tissue_df) <- c('tissue','n.tissue.specific.genes')
# merge data
mrg = merge(gene_df, tissue_df, by = 'tissue')
mrg$pct = round(mrg$n.genes / mrg$n.tissue.specific.genes,3)*100
mrg = mrg[rev(order(mrg$pct)),]
# write
colnames(mrg) <- c('GTEx Tissue', 'Completely knocked-out tissue-specific genes', 'tissue-specific genes', '%')
write.csv(mrg, '210131_tissue_specific_genes.csv', quote = F,row.names = F)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.