inst/doc/GCSscore.R

### R code from vignette source 'GCSscore.Rnw'

###################################################
### code chunk number 1: loadGCSscore
###################################################
library(GCSscore)


###################################################
### code chunk number 2: GCSscore.Rnw:56-62
###################################################
# get the path to example CEL files in the package directory:
celpath1 <- system.file("extdata/","MN_2_3.CEL", package = "GCSscore")
celpath2 <- system.file("extdata/","MN_4_1.CEL", package = "GCSscore")

# run GCSscore() function directly on the two .CEL files above:
GCSs.single <- GCSscore(celFile1 = celpath1, celFile2 = celpath2)


###################################################
### code chunk number 3: GCSscore.Rnw:75-90
###################################################
# view class of output:
class(GCSs.single)[1]

# convert GCSscore single-run from ExpressionSet to data.table:
GCSs.single.dt <- 
  data.table::as.data.table(cbind(GCSs.single@featureData@data,
                                  GCSs.single@assayData[["exprs"]]))

# show all column names included in the output:
colnames(GCSs.single.dt)

# show simplified output of select columns and rows:
GCSs.single.dt[10000:10005,
              c("transcriptclusterid","symbol",
                "ref_id","Sscore")]


###################################################
### code chunk number 4: GCSscore.Rnw:96-104
###################################################
# get the path to example CSV file in the package directory:
celtab_path <- system.file("extdata",
                           "GCSs_batch_ex.csv", 
                           package = "GCSscore")
# read in the .CSV file with fread():
celtab <- data.table::fread(celtab_path)
# view structure of 'celTable' input:
celtab


###################################################
### code chunk number 5: GCSscore.Rnw:109-112
###################################################
path <- system.file("extdata", package = "GCSscore")
celtab$CelFile1 <- celtab[,paste(path,CelFile1,sep="/")]
celtab$CelFile2 <- celtab[,paste(path,CelFile2,sep="/")]


###################################################
### code chunk number 6: GCSscore.Rnw:117-119
###################################################
# run GCSscore using using all info from the batch file:
GCSs.batch <- GCSscore(celTable = celtab, celTab.names = TRUE)


###################################################
### code chunk number 7: GCSscore.Rnw:124-140
###################################################
# view class of output:
class(GCSs.batch)[1]

# converting GCS-score output from'ExpressionSet' to 'data.table':
GCSs.batch.dt <-
  data.table::as.data.table(cbind(GCSs.batch@featureData@data,
                                  GCSs.batch@assayData[["exprs"]]))

# show all column names included in the output:
colnames(GCSs.batch.dt)

# show simplified output of select columns and rows:
GCSs.batch.dt[10000:10005,
              c("transcriptclusterid","symbol",
                "example01","example02","example03")]



###################################################
### code chunk number 8: GCSscore.Rnw:154-157
###################################################
GEO <- "GSE103380"
dir.geo <- paste(tempdir(),GEO,sep="/")
dir.create(dir.geo, showWarnings = FALSE)


###################################################
### code chunk number 9: GCSscore.Rnw:174-183
###################################################
list.cels <- c("GSM2769665","GSM2769666","GSM2769667","GSM2769668",
               "GSM2769669","GSM2769670","GSM2769671","GSM2769672")

# create function for pulling down the compressed .CEL files:
cels.get <- function(x)  
  GEOquery::getGEOSuppFiles(GEO = x,
                            makeDirectory = FALSE,
                            baseDir = dir.geo,
                            filter_regex = "*.CEL.gz")


###################################################
### code chunk number 10: GCSscore.Rnw:186-198
###################################################
lapply(list.cels,cels.get)
files.geo <- paste(dir.geo,list.files(path=dir.geo, 
                                      pattern = ".gz"),sep="/")

# create function to gunzip the compressed data files:
fun.gunzip <- function(x) 
  R.utils::gunzip(filename = x,
                  overwrite=TRUE,
                  remove=FALSE)

# apply the gunzip function across the vector of compressed CEL files:
lapply(files.geo,fun.gunzip)


###################################################
### code chunk number 11: GCSscore.Rnw:203-223
###################################################
celtab_path <- system.file("extdata",
                           "GSE103380_batch.csv", 
                           package = "GCSscore")

# read in the .CSV file with fread():
celtab <- data.table::fread(celtab_path)

# adds path to celFile names in batch input:
# NOTE: this is not necessary if the .CEL files 
#       are in the working directory:
celtab$CelFile1 <- celtab[,paste(dir.geo,CelFile1,sep="/")]
celtab$CelFile2 <- celtab[,paste(dir.geo,CelFile2,sep="/")]

# run GCSscore using using all info from the batch file:
GCSs.GSE103380 <- GCSscore(celTable = celtab, celTab.names = TRUE)

# convert GCS-score output from'ExpressionSet' to 'data.table':
GCSs.GSE103380.dt <- 
data.table::as.data.table(cbind(GCSs.GSE103380@featureData@data,
                                GCSs.GSE103380@assayData[["exprs"]]))


###################################################
### code chunk number 12: GCSscore.Rnw:226-239
###################################################
GCSs.GSE103380.dt[, day4_1_vs_naive := 
                    rowMeans(GCSs.GSE103380.dt[
                      ,day4_1_vs_naive_1:day4_1_vs_naive_4])]
GCSs.GSE103380.dt[, day4_2_vs_naive := 
                    rowMeans(GCSs.GSE103380.dt[
                      ,day4_2_vs_naive_1:day4_1_vs_naive_4])]
GCSs.GSE103380.dt[, day4_3_vs_naive := 
                    rowMeans(GCSs.GSE103380.dt[
                      ,day4_3_vs_naive_1:day4_1_vs_naive_4])]
GCSs.GSE103380.dt[, day4_4_vs_naive := 
                    rowMeans(GCSs.GSE103380.dt[
                      ,day4_4_vs_naive_1:day4_1_vs_naive_4])]



###################################################
### code chunk number 13: GCSscore.Rnw:244-251
###################################################
GCSs.GSE103380.dt <- GCSs.GSE103380.dt[!is.na(symbol)]
# Set the SAM 'gene.names' to be either ('TCid' or 'symbol'):
GCSs.GSE103380.dt.SAM <- 
  siggenes::sam(GCSs.GSE103380.dt[
    ,day4_1_vs_naive:day4_4_vs_naive],
    cl = rep(1,4),rand=123,
    gene.names = GCSs.GSE103380.dt$symbol)


###################################################
### code chunk number 14: GCSscore.Rnw:254-264
###################################################
GCSs.GSE103380.dt.SAM

# create name and path of SAM results:
sam.path <- paste(dir.geo,
                  "GSE103380_ex_SAM_16_6.csv",
                  sep = "/")
# Save TCids with delta >= 16.6:
siggenes::sam2excel(GCSs.GSE103380.dt.SAM,
                    delta = 16.6, 
                    file = sam.path)


###################################################
### code chunk number 15: GCSscore.Rnw:267-271
###################################################
sam.results <- data.table::fread(sam.path)

# View the top 10 DEGs output by SAM:
head(sam.results,10)

Try the GCSscore package in your browser

Any scripts or data that you put into this service are public.

GCSscore documentation built on Nov. 8, 2020, 7:47 p.m.