tt <- genefilter::rowttests(X, fac = factor(cl), tstatOnly = TRUE)
tt2 <- rowTstat(X, cl)
tt$statistic - tt2
system.time(rowTstatNull(expmat = X, classes = cl, nperm = 1000))
library(Category)
system.time(ttperm(X, fac = factor(cl), B = 1000))
A <- read_matrix('/Volumes/HCM/Human_LCM/UCSC_hg19/Counts_TPM/All-TPM-UCSChg19-Symbol.txt')
tt <- tpm(counts = A)
A <- A[,sample(1:ncol(A), 30)]
a1 <- basic_rank_norm(counts = A)
a2 <- basicRankNorm(A)
dim(a2)
boxplot(a1)
boxplot(a2)
diag(cor(a1, a2))
At <- tpm(counts = A, log = FALSE)
A1 <- t(t(apply(At, 2, rank, na.last="keep"))/(colSums(!is.na(A))+1))
A2 <- t(apply(A1, 1, rank, na.last="keep"))/(rowSums(!is.na(A1))+1)
summary(A2[,1])
# function to change gene names
load("data/geneInfo.RData")
counts <- readr::read_tsv("../UCSC_hg19/Counts_TPM/AllCounts-UCSC-hg19-knownGene.txt")
test <- any2symbol(as.character(counts$EntrezID))
geneids <- any2entrez(gene_ids = test)
tt <- c('weird', 'weirder', 'weirdest')
idx <- Position(function(i) any(test %in% i), geneInfo)
if(is.na(idx)) stop("Sorry, could not find any of the provided gene ids among the gene symbol
and Ensembl reference, respectively!")
entrez <- geneInfo[["ENTREZID"]]
names(entrez) <- geneInfo[[idx]]
trez[test])
sum(test %in% names(entrez))
data("geneInfo")
sum(test %in% geneInfo$SYMBOL)
sum(is.na(geneids))
geneids <- letters[1:25]
# GSEA --------------------------------------------------------------------
library(binilib)
reg <- readRDS("../Analyses/1_CUMC_LCM/Networks/Epi/ARACNE-AP/UCSChg19-Epi-TPMraw-ARACNEap-TF.rds")
gsigs <- read_matrix('../Analyses/External_Data_Sets/IPMN_Precursor/Log2FC-Matrix_Hiraoka.tsv')
gsig <- gsigs[,1]
head(gsig)
library(fgsea)
toi <- reg[['AEBP1']]
gs <- gsea_regulon(gsig, regulon = toi)
plot(gs)
tt <- viper::aREA(gsig, regulon = reg)$nes
tt[which.min(tt)]
head(tt)
ps <- names(toi$tfmode)[toi$tfmode >= 0]
ng <- names(toi$tfmode)[toi$tfmode < 0]
fgsea(pathways = list(PLAG1pos = ps), stats = signature, nperm = 10000)
tt <- gsea1T(signature, gS = ps)
tt$ES
tt <- gsea_null(tt, perm =10000)
tt$pval
plot(tt)
tt <- gsea_regulon(signature, toi)
tt <- gsea_null(tt)
plot(tt, plotSignature = TRUE, signatureNames = c('Precursor', 'PDA'),
signatureType = 'Z-score')
str(tt)
tfm <- toi[['tfmode']]
pos_targ <- names(tfm)[tfm>=0]
neg_targ <- names(tfm)[tfm<0]
position_pos <- match(pos_targ, names(sort(signature, decreasing = TRUE))) # 1st decreasing for pos. targets
position_neg <- match(neg_targ, names(sort(signature, decreasing = FALSE))) # 2nd increasing for neg. targets
names(position_pos) <- pos_targ
names(position_neg) <- neg_targ
rlistr <- rank(signature)
rlistr <- rlistr[!(names(signature) %in% names(tfm))] # ranks of non-hits
rlistr <- sort(c(rlistr, position_pos, position_neg))
rlist <- signature[match(names(rlistr), names(signature))]
x <- which(names(rlist) %in% names(tfm))
nr <- sum(abs(rlist[x])^1)
nh <- length(rlist)-length(x)
es <- rep(-(1/nh),length(rlist))
es[x] <- abs(rlist[x])^1/nr
rs <- cumsum(es)
max(rs) + min(rs)
null <- gsea1T_null(signature, pos_targ)
sum(null$null_es > max(rs))
max(rs)/null$pos_null_es
plot(res)
tt <- gsea_null(res)
tt$NES_neg
ps <-
res <- gsea1T(signature, pos_targ)
class(res)
res <- gsea_null(res)
res$ES
res$nes
res
pdf('test.pdf', width = 5, height = 4, useDingbats = FALSE)
plot(res, plotSignature = TRUE, main = 'KLF5 targets',
signatureNames = c('DMSO', 'PTC596'), signatureType = 'Z-score')
dev.off()
res <- gsea1T(ptc596_signature, names(ps), sorting = 'increasing')
pdf('test.pdf', width = 6, height = 4, useDingbats = FALSE)
plot(res, plotSignature = TRUE, main = 'Positive KLF5 targets',
signatureNames = c('DMSO', 'PTC596'),
signatureType = 'Z-score')
dev.off()
data("homologyInfo")
class(homologyInfo$Entrez)
class(names(homologyInfo$Entrez))
nms <- names(homologyInfo$Entrez)
human <- as.character(homologyInfo$Entrez)
names(human) <- nms
head(human)
class(nms)
class(human)
homologyInfo$Entrez <- as.character(homologyInfo$Entrez)
iris %>% mutate(bb = sqrt(Sepal.Length))
iris %<>% mutate(bb = sqrt(Sepal.Length))
library(binilib)
marinaLH <- readxl::read_excel('../AG_Rad/Data/Human-Signatures.xlsx', sheet = 3)
set1 <- readRDS('set1.rds')
set2 <- readRDS('set2.rds')
lhsig <- marinaLH$nes
names(lhsig) <- marinaLH$gene
gs_lc <- gsea2T(lhsig, set1, set2)
plot(gs_lc, legend = FALSE,
plotSignature = TRUE,
signatureNames = c('High-grade','Low-grade'),
signatureType = 'Z-score')
xs <- rnorm(800)
s1 <- gl(n = 4, k = 200, length = 800, labels = paste('very long factor name', sample(letters[1:26], size = 4)))
s2 <- gl(n = 2, k = 100, length = 800, labels = LETTERS[25:26])
xs <- do.call('c', lapply(1:8, function(i) rnorm(n = 100, mean = i)))
split_violin(x = xs, s1 = s1, s2 = s2, rug = TRUE, rotate_xlabels = 60, main = 'Whats up')
# Gene sets ---------------------------------------------------------------
library(binilib)
fs <- list.files('~/Downloads/msigdb/')
nms <- str_extract(fs, '^\\w\\d{0,1}\\.\\w{2,3}')
path <- '~/Downloads/msigdb/'
l <- lapply(seq(length(fs)), function(i){
hm <- strsplit(readLines(paste0(path, fs[i])), split = "\t")
nm <- purrr::map_chr(hm, 1)
hm <- purrr::map(hm, ~ .[3:length(.)])
names(hm) <- nm
hm
})
names(l) <- nms
msigdb <- l
save(msigdb, file = 'data/msigdb.RData')
# Mouse
data("homologyTable")
l2 <- lapply(l, function(i){
lapply(i, function(j){
tmp <- tibble(human = j) %>% inner_join(homologyTable$symbol, by = 'human')
unique(return(tmp$mouse)) # in case the same mouse gene is returned multiple times
})
})
# Check that names are still the same
identical(names(l), names(l2))
identical(unlist(lapply(l, function(i) names(i)), use.names = FALSE), unlist(lapply(l2, function(i) names(i)), use.names = FALSE))
# check lengths
hu <- unlist(lapply(l, function(i){
vapply(i, function(j) length(j), FUN.VALUE = numeric(1))
}), use.names = FALSE)
mo <- unlist(lapply(l2, function(i){
vapply(i, function(j) length(j), FUN.VALUE = numeric(1))
}), use.names = FALSE)
plot(hu, mo, pch = 16, cex = 0.7) # looks good ; numbers are quite different per se
summary(hu - mo) # 2 genes median difference
which(hu - mo == 357)
hu[145] - mo[145]
# check hallmark
l2$h.all$HALLMARK_TNFA_SIGNALING_VIA_NFKB
l$h.all$HALLMARK_TNFA_SIGNALING_VIA_NFKB
msigdbMouse <- l2
save(msigdbMouse, file = 'data/msigdbMouse.RData')
# Build hallmark founder set information ----------------------------------
fs <- list.files('~/Desktop/ADVOCATE_NewPaper/ADVOCATE-Paper/External_cohorts/Screening-Pathways/Hallmark_Founder_GeneSets/')
load("~/Desktop/ADVOCATE_NewPaper/ADVOCATE-Paper/External_cohorts/Screening-Pathways/Hallmark-Founder-GeneSets-Symbol.rda")
l <- map(hallmark_founder, ~ names(.))
hallmark_founder <- l
save(hallmark_founder, file = 'data/hallmarkFounder.RData')
sigs <- readRDS('../Laise-PDA-Activity/Data/bulkSubtypes-SignatureMatrix.rds')
sigs[1:5, 1:3]
tt <- specific_n(sigs)
# fgseaResults --------------------------------------------------------------------------------
library(DESeq2)
library(binilib)
dset <- readRDS('/home/carlo/Documents/Analyses/AG_Keller/Habringer_Stefan/Richard_2020-06-24/Data/Lewis-Eset-Prefilter-Counts.rds')
dset <- dset[,dset$Tissue == 'lymphoma']
dds <- DESeqDataSetFromMatrix(apply(exprs(dset), 2, round), pData(dset), ~ Genotype)
dds <- DESeq(dds)
resultsNames(dds)
res <- results(object = dds, contrast = c('Genotype', 'EMU_TCL1_WHIM', 'EMU_TCL1'), tidy = TRUE) %>%
filter(!is.na(padj)) %>%
tbl_df() %>% arrange(desc(stat))
res
gsig <- p2z(res$pvalue, res$stat)
names(gsig) <- res$row
fres_hm <- fgsea::fgsea(pathways = msigdbMouse$h.all,
stats = gsig, nperm = 10000)[order(NES, decreasing = TRUE), ]
fres_hm <- subset(fres_hm, padj < 0.1)
gs <- msigdbMouse$h.all[fres_hm$pathway]
tmp <- lapply(gs, function(i){
gsea1T(signature = gsig, gS = i)
})
!any(names(gs) %in% fres_hm$pathway)
absrange <- cut(abs(gsig), breaks = quantile(abs(gsig)), include.lowest = TRUE, labels = FALSE)
length(absrange)
head(absrange)
table(absrange)
cols <- t(apply(col2rgb(c('firebrick2', 'royalblue')), 1, function(i){
seq(i[1], i[2], length.out = length(gsig))
}))
dim(cols)
cols <- rbind(cols, als)
tmp <- cols[,1]
tmp
rgb(238.0000, 44, 44, maxColorValue = 255, alpha = 244.5688)
cols[1,]
als[1]
plot_fgseaRes(ges = gsig,
geneSets = msigdbMouse$h.all[fres_hm$pathway],
signatureNames = c('Emu TCL1 WHIM', 'Emu TCL1'),
fgseaRes = fres_hm)
sigmat <- binilib::read_matrix('../Analyses/External_Data_Sets/Somerville_CellRep_2018/Data/log2FCMatrix-Comparisons.tsv')
reg <- readRDS('../Analyses/1_CUMC_LCM/Salmon_Gencode_hg38/Networks_Salmon/Epi/CUMC-Salmonhg38-DEtransform-Epi-TFPlusCoTF-AnimalTFDb.rds')
vpres <- viper::aREA(sigmat, reg)$nes
fdrs <- apply(vpres, 2, function(i) p.adjust(p = pnorm(abs(i), lower.tail = FALSE)*2, 'fdr'))
fdrs['TP63',]
plot_OneReg_MultSig(sigmat = sigmat,
tf = 'TP63', regulon = reg)
tt <- abs(seq(-255, 255, length.out = 15e3))
# Keller Lab Favorite Pathways ----------------------------------------------------------------
vst <- getVarianceStabilizedData(dds)
boxplot(split(vst['Dmtn', ], f = dds$Genotype))
reg <- readRDS('../../KellerLab_Pathways/KellerLab-FavoritePathway-Regulons.rds')
reg <- reg[-7]
vpres <- lapply(reg, function(i){
viper(vst, i, method = 'ttest')
})
vpres <- do.call(rbind, vpres)
dd <- model.matrix(~ dds$Genotype)
library(limma)
colnames(dd) <- levels(dds$Genotype)
fit <- eBayes(lmFit(vpres, dd))
res <- topTable(fit, coef = 'EMU_TCL1_WHIM',
number = nrow(fit),
genelist = rownames(fit)) %>%
tbl_df() %>% arrange(desc(t))
res
boxplot(split(vpres['HALLMARK_WNT_BETA_CATENIN_SIGNALING', ], f = dds$Genotype))
# Import --------------------------------------------------------------------------------------
dset <- readRDS('../Analyses/AG_Reichert/Data/RNAseq_Organoids_LogTransform-22584x54.rds')
colaps <- gsub('_\\d$', '', dset$sample)
vmat <- t(apply(Biobase::exprs(dset), 1, function(i) tapply(i, colaps, mean)))
purIST(emat = vmat, raw = TRUE)
# DEtransform -------------------------------------------------------------
gg <- readRDS('../Analyses/External_Data_Sets/Cell_line_expression/Klijn_Cells/Data/140625_Klijn_counts_coding.rds')
gg <- readRDS('../Analyses/Califano_Lab/Counts-Califano-NET.rds')
tt <- readRDS('../Analyses/External_Data_Sets/ICGC-Canada/EGA_Downloads/RNA_Seq/Counts_TPM/ICGC-PACA-CA-EsetCounts.rds')
exprs(gg)[1:5, 1:5]
mode(exprs(gg)[,1])
emat <- gg@assayData$exprs
vv <- DEtransform(emat = emat)
dds <- DESeq2::DESeqDataSetFromMatrix(countData = emat,
colData = data.frame(condition = rep(1, ncol(emat)),
row.names = colnames(emat)),
design = ~ 1)
dds
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.