inst/doc/TcGSA_userguide.R

## ----pre, echo=FALSE, warning=FALSE, include=FALSE----------------------------
library(knitr)
is_check <- ("CheckExEnv" %in% search()) || any(c("_R_CHECK_TIMINGS_", "_R_CHECK_LICENSE_") %in% names(Sys.getenv()))
knitr::opts_chunk$set(echo = TRUE, eval=!is_check, cache=TRUE,
					  collapse = TRUE,
					  comment = "#>"
)
#rmarkdown::render("vignettes/TcGSA_userguide.Rmd")

## ----GS_import, message=FALSE-------------------------------------------------
temp <- tempfile()
utils::download.file("http://doi.org/10.1371/journal.pcbi.1004310.s007", destfile = temp, mode = "wb")
load(unz(temp, "ReproducibleRFiles/GMTs_PLOScb.RData", open = "r"))
unlink(temp)
rm(temp)

## ----GEOquery, include=FALSE, message=FALSE, cache=FALSE----------------------
if (!requireNamespace("GEOquery", quietly = TRUE)) {
	if (!requireNamespace("BiocManager", quietly = TRUE)){
    	install.packages("BiocManager")
	}
	BiocManager::install("GEOquery")
}

## ----import_dalia, message=FALSE, results='hide'------------------------------
GEOquery::getGEOSuppFiles('GSE46734', filter_regex="(*NonParamCombat*)|(*DESIGN*)")

## ----design_dalia-------------------------------------------------------------
design <- read.delim(gzfile("GSE46734/GSE46734_DALIA1longitudinalTranscriptome_DESIGN_anonym.txt.gz"))
design_preATI <- design[-which(design$TimePoint<0 | design$TimePoint==16 | design$TimePoint>22), ]
head(design_preATI,5)

## ---- include=FALSE-----------------------------------------------------------
stopifnot(nrow(design_preATI)==90 & ncol(design_preATI)==6)

## ----expr_dalia, cache=TRUE---------------------------------------------------
expr_preATI <- read.delim(gzfile("GSE46734/GSE46734_DALIA1longitudinalTranscriptome_PALO01_PreATI_NEQC_NonParamCombat.txt.gz"))
rownames(expr_preATI) <- expr_preATI$PROBE_ID
expr_preATI <- expr_preATI[,as.character(design_preATI$Sample_name)]

expr_preATI[1:4,1:4]

## ---- include=FALSE-----------------------------------------------------------
stopifnot(nrow(expr_preATI)==32978 & ncol(expr_preATI)==90)

## ----dim_expr_DALIA-----------------------------------------------------------
identical(ncol(expr_preATI), nrow(design_preATI))

## ----LR_ST, message=FALSE, warning=FALSE--------------------------------------
tcgsa_result <- TcGSA::TcGSA.LR(expr = expr_preATI, 
								gmt = gmt_modulesV2, 
								design = design_preATI, 
								subject_name = "Patient_ID", 
								time_name = "TimePoint")

## ----tcgsa_result, echo=FALSE-------------------------------------------------
tcgsa_result

## ----summary_dalia------------------------------------------------------------
summary(tcgsa_result)

## ----signifLRT_ST-------------------------------------------------------------
head(TcGSA::signifLRT.TcGSA(tcgsa_result)$mixedLRTadjRes)

## ----multtest_ST--------------------------------------------------------------
head(TcGSA::multtest.TcGSA(tcgsa_result))

## ----plot1GS_ST, message=FALSE, warning=FALSE, fig.keep='all'-----------------
TcGSA::plot1GS(expr = expr_preATI, 
			   #plot1GS(expr = tcgsa_result$Estimations,
			   gmt = gmt_modulesV2, 
			   Subject_ID = design_preATI$Patient_ID, 
			   TimePoint = design_preATI$TimePoint,
			   clustering = FALSE, 
			   time_unit = "W", 
			   geneset.name = "M3.2", 
			   title="",
			   margins=0.4,
			   lab.cex=0.37,
			   axis.cex=0.37,
			   line.size=0.45,
			   gg.add=list(ggplot2::theme(legend.position="none"),
			   			ggplot2::ylim(-1.26,1.26)
			   ))

## ----import_ober, message=FALSE, warning=FALSE--------------------------------
utils::download.file("ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE30nnn/GSE30101/soft/GSE30101_family.soft.gz", destfile = "GSE30101_family.soft.gz", mode = "wb", cacheOK = FALSE)
gse.soft <- GEOquery::getGEO(filename="GSE30101_family.soft.gz")

## ----expr_ober----------------------------------------------------------------
probesIDs <- GEOquery::Table(GEOquery::GSMList(gse.soft)[[1]])$ID
data.matrix <- do.call('cbind', lapply(GEOquery::GSMList(gse.soft),
									   function(x) {
									   	tab <- GEOquery::Table(x)
									    mymatch <- match(probesIDs,tab$ID_REF)
									    return(tab$VALUE[mymatch])
									   }))
rownames(data.matrix) <- probesIDs
expr.All.ChaussVac <- apply(X = data.matrix, MARGIN = 2, FUN = as.numeric)
rownames(expr.All.ChaussVac) <- probesIDs

## ----design_ober--------------------------------------------------------------
design_list <- lapply(GEOquery::GSMList(gse.soft), 
					  function(x){GEOquery::Meta(x)$characteristics_ch1})
design <- data.frame(row.names = names(design_list))
design$sample_ID <- names(design_list)
s_id <- unlist(lapply(design_list, function(x){gsub("subject id: ", "", x[grep("subject id: ", x)])}))
design$Subject_ID <- as.character(paste("P", s_id[design$sample_ID], sep=""))

time <- unlist(lapply(design_list, function(x){gsub("day: ", "", x[grep("day: ", x)])}))
time[which(time %in% c("-7", "0.5", "1", "7", "10", "14",  "21", "28"))] <-
	paste("D", time[which(time %in% c("-7", "0.5", "1", "7", "10", "14", "21", "28"))], sep="")
time[which(time %in% c("-168", "1.5", "6", "9", "12", "15", "24", "36", "48"))] <-
	paste("H", time[which(time %in% c("-168", "1.5", "6", "9", "12", "15", "24", "36", "48"))], sep="")
design$Time <- as.character(time[design$sample_ID])

vac <- unlist(lapply(design_list, function(x){
	gsub("vaccine: ", "", x[grep("vaccine: ", x)])
}))
vac <- as.factor(vac)
levels(vac) <- c("influenza", "influenza", "influenza", "influenza", "saline", 
				 "pneumo", "pneumo", "pneumo", "saline", "saline")
design$Vaccine <- as.character(vac[design$sample_ID])

sampSet <- unlist(lapply(design_list, function(x){
	gsub("sample set: ", "", x[grep("sample set: ", x)])
}))
design$sampSet <- as.character(sampSet[design$sample_ID])

design$Time[which(design$sampSet=="Training_Set_Vein" & design$Time %in% c("0", "3"))] <-
	paste("D", design$Time[which(design$sampSet=="Training_Set_Vein" & design$Time %in% c("0", "3"))], sep="")
design$Time[which(design$sampSet=="Training_Set_Finger" & design$Time %in% c("0", "3"))] <-
	paste("H", design$Time[which(design$sampSet=="Training_Set_Finger" & design$Time %in% c("0", "3"))], sep="")
design$Time[which(design$sampSet=="Test_Set_Vein" & design$Time %in% c("0", "3"))] <-
	paste("D", design$Time[which(design$sampSet=="Test_Set_Vein" & design$Time %in% c("0", "3"))], sep="")
design$Time[which(design$sampSet=="Test_Set_Finger" & design$Time %in% c("0", "3"))] <-
	paste("D", design$Time[which(design$sampSet=="Test_Set_Finger" & design$Time %in% c("0", "3"))], sep="")
design$Time[which(design$sampSet=="Validation_Vein" & design$Time %in% c("0", "3"))] <-
	paste("D", design$Time[which(design$sampSet=="Validation_Vein" & design$Time %in% c("0", "3"))], sep="")

design$Day <- gsub("D", "", design$Time)
design$Day[grep("H", design$Day)] <- as.numeric(gsub("H", "", design$Day[grep("H", design$Day)]))/24
design$Day <- as.numeric(design$Day)

design.All.ChaussVac <- design



# Avg Baseline -----
design.All.ChaussVac.trainSetVein <- design.All.ChaussVac[which(design.All.ChaussVac$sampSet=="Training_Set_Vein"),]
samplesSaline2rmv <- design.All.ChaussVac.trainSetVein[162:214,"sample_ID"]
design.All.ChaussVac.noDup <- design.All.ChaussVac.trainSetVein[-which(design.All.ChaussVac.trainSetVein$sample_ID%in%samplesSaline2rmv),]

design.All.ChaussVac.AvgBl <- design.All.ChaussVac.noDup[which(design.All.ChaussVac.noDup$Day!=0),]
design.All.ChaussVac.AvgBl[which(design.All.ChaussVac.AvgBl$Day==-7),"Day"] <- 0
design.All.ChaussVac.AvgBl[which(design.All.ChaussVac.AvgBl$Time=="D-7"),"Time"] <- "D0"

expr.All.ChaussVac.AvgBl <- expr.All.ChaussVac[, design.All.ChaussVac.AvgBl$sample_ID]
for(p in unique(design.All.ChaussVac.AvgBl$Subject_ID)){
	if(length(which(design.All.ChaussVac.noDup$Subject_ID==p & (design.All.ChaussVac.noDup$Day==0 | design.All.ChaussVac.noDup$Day==-7)))>1){
		expr.All.ChaussVac.AvgBl[, which(design.All.ChaussVac.AvgBl$Subject_ID==p & design.All.ChaussVac.AvgBl$Day==0)] <-
			apply(X=cbind(expr.All.ChaussVac[, design.All.ChaussVac.noDup[which(design.All.ChaussVac.noDup$Subject_ID==p & design.All.ChaussVac.noDup$Day==0), "sample_ID"]],
						  expr.All.ChaussVac[, design.All.ChaussVac.noDup[which(design.All.ChaussVac.noDup$Subject_ID==p & design.All.ChaussVac.noDup$Day==-7), "sample_ID"]]),
				  MARGIN=1, FUN=mean, na.rm=TRUE)
	}
}
rownames(expr.All.ChaussVac.AvgBl) <- probesIDs

if(!all.equal(as.character(design.All.ChaussVac.AvgBl$sample_ID), colnames(expr.All.ChaussVac.AvgBl))){stop("\n\n\nWARNING: EXPRESSION FILE ORDER NOT MATCHING DESIGN FILE\n\n\n")}
design.All.ChaussVac.AvgBl$Subject_ID <- as.factor(design.All.ChaussVac.AvgBl$Subject_ID)


design.PNEUMOvsSALINE.ChaussVac.AvgBl <- design.All.ChaussVac.AvgBl[which(design.All.ChaussVac.AvgBl$Vaccine!="influenza"), ]
design.PNEUMOvsSALINE.ChaussVac.AvgBl$Vaccine <- as.factor(as.character(design.PNEUMOvsSALINE.ChaussVac.AvgBl$Vaccine))

expr.PNEUMOvsSALINE.ChaussVac.AvgBl <- expr.All.ChaussVac.AvgBl[,design.PNEUMOvsSALINE.ChaussVac.AvgBl$sample_ID]

## ----LR_MT, message=FALSE, warning=FALSE--------------------------------------
tcgsa_result_MT <- TcGSA::TcGSA.LR(expr = expr.PNEUMOvsSALINE.ChaussVac.AvgBl, 
								   gmt = gmt_modulesV2,
								   design = design.PNEUMOvsSALINE.ChaussVac.AvgBl, 
								   subject_name = "Subject_ID", 
								   time_name = "Day", 
								   group_name = "Vaccine")
summary(tcgsa_result_MT)
head(TcGSA::signifLRT.TcGSA(tcgsa_result_MT)$mixedLRTadjRes)

## ----clust_MT, message=FALSE, warning=FALSE-----------------------------------
clust <- TcGSA::clustTrend(tcgs = tcgsa_result_MT, 
						   expr=tcgsa_result_MT$Estimations,
						   Subject_ID=design.PNEUMOvsSALINE.ChaussVac.AvgBl$Patient_ID,
						   TimePoint=design.PNEUMOvsSALINE.ChaussVac.AvgBl$Day,
						   group.var = design.PNEUMOvsSALINE.ChaussVac.AvgBl$Vaccine,
						   group_of_interest="pneumo",
						   ref="saline")
clust

## ----heatmap_MT, message=FALSE, results='asis'--------------------------------
plot(x=tcgsa_result_MT, expr=tcgsa_result_MT$Estimations,
	 Subject_ID=design.PNEUMOvsSALINE.ChaussVac.AvgBl$Patient_ID,
	 TimePoint=design.PNEUMOvsSALINE.ChaussVac.AvgBl$TimePoint,
	 group_of_interest="pneumo",
	 clust_trends=clust,
	 legend.breaks=seq(from=-2,to=2, by=0.01), time_unit="D",
	 subtitle="Pneumo vs Saline", cex.label.row=0.5, cex.label.col=1, cex.main=0.7,
	 heatmap.width=0.2, dendrogram.size=0.3, margins=c(2,3),
	 heatKey.size=0.8)

## ----dl_GEOquery, warning=FALSE, message=FALSE, eval=FALSE--------------------
#  if (!requireNamespace("GEOquery", quietly = TRUE)) {
#  	if (!requireNamespace("BiocManager", quietly = TRUE)){
#  		install.packages("BiocManager")
#  	}
#  	BiocManager::install("GEOquery")
#  }

Try the TcGSA package in your browser

Any scripts or data that you put into this service are public.

TcGSA documentation built on March 18, 2022, 5:53 p.m.