knitr::opts_chunk$set(collapse=TRUE, comment = "#>", fig.width=9, fig.height=6, eval=TRUE, echo=FALSE, results="hide", warning=FALSE)

GSE14315

gse_three = readRDS("~/projects/study_gpl570/study_GSE14315_trscr_gs.rds")

#separer les infos de la colonne sample_title
gse_three$exp_grp$cell_line = stringr::str_split_fixed(gse_three$exp_grp$sample_title, "-", 2)[,1]
library(stringr)
gse_three$exp_grp$treatment_5aza = sapply(stringr::str_split_fixed(gse_three$exp_grp$sample_title, "-", 2)[,2],function(it){
  if (str_detect(it, "un")){
    return("ctl")
  }  else if (str_detect(it, "aza")){
    return("ttt")
  } else {
    stop(paste0("Enable to parse value: ", it))
  }

})

#verification treatment bien attribué + info pertinente (deux lignées hors lung (breast and colon))
compar = data.frame(sample_title=gse_three$exp_grp$sample_title, ttt=gse_three$exp_grp$treatment_5aza, cell=gse_three$exp_grp$cell_line, topology=gse_three$exp_grp$topology)
(compar = compar[order(compar$cell),])

#tableau de completude du design, selection des echantillons représenté dans les 3 traitements
tmp_table=table(gse_three$exp_grp$cell_line, gse_three$exp_grp$treatment_5aza)
tmp_table
as.matrix(tmp_table)
foo <-apply(tmp_table,1,function(l){
  print(l)
  all(l==c(3,3))
})
foo
idx_cell_line=names(foo)[foo]
gse_three$exp_grp=gse_three$exp_grp[gse_three$exp_grp$cell_line%in%idx_cell_line,]
table(gse_three$exp_grp$cell_line, gse_three$exp_grp$treatment_5aza)


#2 groupes de traitements
gse_three$exp_grp = gse_three$exp_grp[order(gse_three$exp_grp$treatment_5aza, gse_three$exp_grp$cell_line),]
gse_three$exp_grp[, c("treatment_5aza","cell_line")]
idx_ctl=rownames(gse_three$exp_grp)[gse_three$exp_grp$treatment_5aza%in%"ctl"]
idx_ttt=rownames(gse_three$exp_grp)[gse_three$exp_grp$treatment_5aza%in%"ttt"]


# #t test apparié group control et group high
# pval = c()
# beta = c()
# for (i in 1:nrow(s$data)){
#   x <- s$data[i,idx_1ct]
#   y <- s$data[i,idx_3hi]
#   beta=c(beta,mean(y-x))
#   pval=c(pval, t.test(x,y, paired = TRUE)$p.value)
# }
# pval
# beta
# plot(beta, -log10(pval))

# #meme chose avec apply
# stats = apply(s$data, 1, function(l){
#   x <- l[idx_1ct]
#   y <- l[idx_3hi]
#   beta = mean(y-x)
#   pval = t.test(x,y, paired = TRUE)$p.value
#   ret = c(beta=beta, pval=pval)
#   return(ret)
# })


# #comparaison graph pval/padj(avec BH)
# stats=t(stats)
# stats = data.frame(stats)
# stats$padj = p.adjust(stats$pval, method = "BH")
# layout(matrix(1:2,1), respect = TRUE)
# plot(stats$beta,-log10(stats$pval), pch=".")
# plot(stats$beta,-log10(stats$padj), pch=".")

# # la même chose avec lm
# stats3 = apply(s$data, 1, function(l){
#   #l = s$data[1,]
#   d$expr = l[c(idx_1ct, idx_3hi)]
#   m = lm(expr~treatment_5aza+cell_line, d)
#   beta = m$coefficients[[2]]
#   pval = anova(m)[1,5]
#   ret = c(beta=beta, pval=pval)
#   return(ret)
# })
# stats3 = t(stats3)
# stats3 = data.frame(stats3)
# stats3$padj = p.adjust(stats3$pval, method = "BH")
# layout(matrix(1:2,1), respect = TRUE)
# plot(stats3$beta,-log10(stats3$pval), pch=".")
# abline(h=-log10(0.05), v=c(-1,1), lty=2,col=2)
# plot(stats3$beta,-log10(stats3$padj), pch=".")
# abline(h=-log10(0.05), v=c(-1,1), lty=2,col=2)

# plot(stats$beta, stats3$beta)
# plot(-log10(stats$pval), -log10(stats3$pval))


# memoisation

compute_anadiff = function(s, idx_ctl, idx_ttt) {
  d = data.frame(treatment_5aza=s$exp_grp[c(idx_ctl,idx_ttt), "treatment_5aza"], cell_line=s$exp_grp[c(idx_ctl,idx_ttt), "cell_line" ])
  stats3 = epimedtools::monitored_apply(s$data, 1, function(l){
    #l = s$data[1,]
    d$expr = l[c(idx_ctl, idx_ttt)]
    m = lm(expr~treatment_5aza+cell_line, d)
    beta = m$coefficients[[2]]
    pval = anova(m)[1,5]
    ret = c(beta=beta, pval=pval)
    return(ret)
  })
  return(stats3)  
}

# stats3 = compute_anadiff(s, idx_1ct, idx_3hi)

if (!exists("mcompute_anadiff")) {mcompute_anadiff = memoise::memoise(compute_anadiff)}

stats3 = mcompute_anadiff(gse_three, idx_ctl, idx_ttt)


stats3 = t(stats3)
stats3 = data.frame(stats3)
stats3$padj = p.adjust(stats3$pval, method = "BH")

# layout(matrix(1:2,1), respect = TRUE)
# plot(stats3$beta,-log10(stats3$pval), pch=".")
# abline(h=-log10(0.05), v=c(-1,1), lty=2,col=2)
# plot(stats3$beta,-log10(stats3$padj), pch=".")
# abline(h=-log10(0.05), v=c(-1,1), lty=2,col=2)

#plot(stats$beta, stats3$beta)
#plot(-log10(stats$pval), -log10(stats3$pval))
gse = "GSE14315"
model = "5-aza  vs. DMSO (reference)"
feats[,paste0("l2fc_", gse)] = NA
feats[,paste0("pval_", gse)] = NA
feats[,paste0("l2fc_", gse)] = stats3[rownames(feats),]$beta
feats[,paste0("pval_", gse)] = stats3[rownames(feats),]$padj


fchuffar/dmethr documentation built on July 2, 2024, 1:17 a.m.