In vjcitn/ivygapSE: A SummarizedExperiment for Ivy-GAP data

Overview

The ivygapSE package includes molecular, imaging, and limited clinical data on glioblastoma (GBM) patients.

Expression data (RNA-seq) was developed in a complex design involving tissue blocks and subblocks and a selection process. See documents at https://vjcitn.github.io/ivygapSE for more details.

To work with the available survival data in an elementary way, the following steps can be taken.

Basic setup

library(ivygapSE)
library(MASS)
library(nlme)
library(DT)
data(ivySE)
# In this code we drop out the "CT"-only samples
struc = as.character(colData(ivySE)$structure_acronym)
spls = strsplit(struc, "-")
basis = vapply(spls, function(x) x[1], character(1))
specific = which(basis!= "CT")
iseSP = ivySE[ , specific]
useful = c("EIF4E3","ATP1A2","FGFR3","LMO3","NCAN","FXYD1","PSD2","PRODH",
           "HIF3A","HRSP12","KCNN3","PPM1K","KCNJ10","ADCYAP1R1","BMP7",
           "KAT2B","CNTN1","SAMD9L","SLC7A11","ECHDC2","FAM181B","SALL2","SASH1",

           "CCL4","CCL3","EGR3","IL1B","CCL2","GPR56","TNF","CX3CR1","SERPINE1","CSF1",
           "RAMP1","IL6ST","IL1A","PDE3B","CD276","FLT1","CXCL12","NR4A1","CSF1R","CCR1",
           "RHOB","SPHK1","SORL1","IL6R","VASH1","ADAM28","STAMBPL1","ADAMTSL2")
splim = iseSP[useful,]
cd = colData(splim)
stac = cd$structure_acronym
stac1 = sapply(strsplit(stac, "-"), "[[", 1)
table(stac1)
eexp = as.numeric(log(assay(splim["EIF4E3",])+1))
newdf = data.frame(eexp=eexp, id=splim$tumor_id, bl=splim$block_name)
# it would be nice to generalize so that LE and IT and other types of
# tissue classification could be modeled easily

Enhance `newdf` so that it has binary representations of key sample types (CThbv, CTmvp, etc.)

We are now going to build up newdf so that it has binary outcomes for different sample "types", i.e., leading edge, microvascular proliferation, etc.

newdf$isCTH = ifelse(stac1 == "CThbv", 1, 0)
table(newdf$isCTH)

newdf$isLE = ifelse(stac1=="LE", 1, 0)
table(newdf$isLE)

newdf$isCTMVP = ifelse(stac1=="CTmvp", 1, 0)  # microvascular proliferation
table(newdf$isCTMVP)

newdf$isCTPAN = ifelse(stac1=="CTpan", 1, 0)  # pseudopalisading around necrosis
table(newdf$isCTPAN)

newdf$isCTPNZ = ifelse(stac1=="CTpnz", 1, 0)  #  Perinecrotic zone (CTpnz)
table(newdf$isCTPNZ))

We can do a few more like above, pnz and IT.

Define a mixed effect logistic regression tool

fit_mixed_logistic = function( SE = splim, type = "isLE", gene, dataframe, verbose=FALSE ) {
  stopifnot(all(c("id", "bl", type) %in% names(dataframe)))
  stopifnot(gene %in% rownames(SE))
  dataframe[[gene]] = as.numeric(log(assay(SE[gene,])+1))
  fmla = as.formula(paste(type, "~", gene))
  glmmPQL(fmla, data=dataframe, fam=binomial, random=~1|id/bl, verbose=verbose)
  }

Sample type prediction using mixed effect logistic regression

Leading edge

LE_models = lapply(useful, function(x) fit_mixed_logistic(SE=splim, type="isLE", gene=x, dataframe=newdf) )
names(LE_models) = useful

coefs_LE= lapply(LE_models, function(x) summary(x)$tTable)


newtab_LE = data.frame(gene = names(coefs_LE),
                    effect = round(sapply(coefs_LE, function(x) x[2, "Value"]),3),
                    tstat = round(sapply(coefs_LE, function(x)x[2, "t-value"]), 3))


datatable(newtab_LE)

Hyperplastic blood vessels

CTH_models = lapply(useful, function(x) fit_mixed_logistic(SE=splim, type="isCTH", gene=x, dataframe=newdf) )
names(CTH_models) = useful

coefs_CTH= lapply(CTH_models, function(x) summary(x)$tTable)

newtab_CTH = data.frame(gene = names(coefs_CTH),
                    effect = round(sapply(coefs_CTH, function(x) x[2, "Value"]),3),
                    tstat = round(sapply(coefs_CTH, function(x)x[2, "t-value"]), 3))
datatable(newtab_CTH)

Perinecrotic zone

PNZ_models = lapply(useful, function(x) fit_mixed_logistic(SE=splim, type="isCTPNZ", gene=x, dataframe=newdf) )
names(PNZ_models) = useful
coefs_PNZ = lapply(PNZ_models, function(x) summary(x)$tTable)
newtab_PNZ = data.frame(gene = names(coefs_PNZ),
                    effect = round(sapply(coefs_PNZ, function(x) x[2, "Value"]),3),
                    tstat = round(sapply(coefs_PNZ, function(x)x[2, "t-value"]), 3))
datatable(newtab_PNZ)

PAN: pseudopalisading around necrosis -- yvonne

Enhance the model to use pairs of genes

Now let's try pairs of genes.

allpairs = combn(useful,2)
allpairs.list = lapply(1:ncol(allpairs), function(x) allpairs[,x])

fit_mixed_logistic_CT_two_genes = function( SE = splim, gene1, gene2, dataframe, verbose=FALSE ) {
  stopifnot(all(c("id", "bl", "isCTH") %in% names(dataframe)))  # so far just using CTH, but need to generalize FIXME
   stopifnot(gene1 %in% rownames(SE))
   stopifnot(gene2 %in% rownames(SE))
   dataframe[[gene1]] = as.numeric(log(assay(SE[gene1,])+1))
   dataframe[[gene2]] = as.numeric(log(assay(SE[gene2,])+1))
   fmla = as.formula(paste("isCTH", "~", gene1, "+", gene2))
   glmmPQL(fmla, data=dataframe, fam=binomial, random=~1|id/bl, verbose=verbose)
}

ff_CT_two_genes = lapply(allpairs.list[1:5], function(x) fit_mixed_logistic_CT_two_genes(SE=splim, 
                    gene1=x[1], gene2=x[2], dataframe=newdf) )
ff_CT_two_genes

coefs= lapply(ff_CT_two_genes, function(x) summary(x)$tTable)
coefs

gns_pair = lapply(coefs, function(x) rownames(x)[2:3])
gns_pair

gns = lapply(coefs, function(x) data.frame(genes=rownames(x)[2:3], effs=x[2:3, "Value"], tstats=x[2:3, "t-value"]))
gns