## ----knitr, cache = F, results = "hide", echo = F, warning = T----------------
# save options
options.backup <- options()
# change line wrap to something more readable
options(width = 80)
# Set the global knitr options
knitr::opts_chunk$set(cache = F,
results = "hide",
echo = T,
include = T,
message = T,
warning = T,
fig.width = 9,
fig.height = 6)
## ----initiate-----------------------------------------------------------------
# start time measurement
time.start <- Sys.time()
# load required packages
for (i in c(
"HT12ProcessoR",
"toolboxH",
"lumi",
"knitr",
"here",
"data.table",
"evaluate",
"plotly",
"ggplot2",
"pander"
)) {
suppressPackageStartupMessages(library(i, character.only = TRUE))
}
# set some basic options
panderOptions('table.split.table', Inf)
options(stringsAsFactors=FALSE)
## ----prepare.examples, echo = T-----------------------------------------------
# check the root of your project
# here::dr_here(show_reason = T) # the here package makes the navigation in directories easier but works best from within R-Projects
# get the project root directory
# prepro.folder <- here()
# setwd(/path/to/project)
prepro.folder <- getwd() # please choose a working directory
# create the object with the names of the input files ##
# IDs can be chosen freely, but using terms identifying the data files might be advisable
my.fileset_id <- c('590-651', 'HSS106-HSS155')
# create the results-folder if necessary
dir.create(paste0(prepro.folder, "/tosend/"))
# also create an obj/ folder for intermediary results
dir.create(paste0(prepro.folder, "/obj/"))
# control probe data file
my.con_f <- c(system.file("extdata", "Run12_ControlProbeProfile_590-651.txt", package = "HT12ProcessoR"), # control probe measurements + ercc (spike-in -- class of control probes)
system.file("extdata", "ControlProbeProfile_HSS106-HSS155.txt", package = "HT12ProcessoR")) # second example (processing batch 1 & 2)
# gene probe file
my.nobkgd_f <- c(system.file("extdata", "Run12_nonorm_nobkgd_590-651.txt", package = "HT12ProcessoR"), # raw probe data of human gene expression
system.file("extdata", "Einzelanalyse_nonorm_nobkgd_HSS106-HSS155.txt", package = "HT12ProcessoR"))
# sample file
my.sample_f <- c(system.file("extdata", "Run12_SamplesTable_590-651.txt", package = "HT12ProcessoR"), # sample annotation (each measured indiviuum)
system.file("extdata", "SamplesTable_HSS106-HSS155.txt", package = "HT12ProcessoR"))
# consolidate in input-file
input <- data.table(fileset_id = my.fileset_id,
con_f = my.con_f,
nobkgd_f = my.nobkgd_f,
sample_f = my.sample_f)
# how should the input file be called?
input_filename <- paste0(prepro.folder, "/obj/01_file_overview.txt")
# save the input file for later use
write.table(x = input,
file = input_filename,
quote = F,
col.names = T,
row.names = F,
sep = "\t")
## ----parameter.file-----------------------------------------------------------
myparams <- read.delim(system.file("extdata", "input_parameter_010.txt", package = "HT12ProcessoR"),
as.is = T)
## ----customize.parameter.file-------------------------------------------------
# where is the input file we created located?
myparams[myparams$variable == "file_names_of_files_and_folders", "value"] <- input_filename
# where should the results be saved?
myparams[myparams$variable == "datafolder_results", "value"] <- paste0(prepro.folder, "/tosend/")
# where should the file be saved (overwrite the previously read-in file)
myparamfile <- paste0(prepro.folder, "/obj/input_parameter_010.txt")
# save the file again with the above changes
write.table(myparams,
myparamfile,
quote = F,
col.names = T,
row.names = F,
sep="\t")
## ----pre.pro.step.1-----------------------------------------------------------
# execute function for the checks described above
prepro_ht12 <- checkExtractChipsamples(paramfile = myparamfile) # check the output of the function for details
# print object in HTML-report
pander(prepro_ht12$history)
# check the class of the object
class(prepro_ht12)
# another file preview
pander(ht(prepro_ht12$chipsamples, 1))
## ----pre.pro.step.2-----------------------------------------------------------
# preview first and last lines
pander(ht(prepro_ht12$chipsamples, 2))
# define samples to be excluded, e.g. when different projects are processed on the same chip
exclude.this <- c("614", "638", "HSS 108", "HSS 112", "HSS 115", "HSS 125", "HSS 127", "HSS 133", "HSS 137", "HSS 138", "HSS 146", "HSS 151","HSS 155")
# Exclude Samples by modifying the $chipsamples element
prepro_ht12$chipsamples <- prepro_ht12$chipsamples[!prepro_ht12$chipsamples$old_ID %in% exclude.this, ]
# Define Subgroups - in this example, sampels are from Peripheral Blood Mononuclear Cells (PBMC)
prepro_ht12$chipsamples$subgroup <- "PBMC"
table(prepro_ht12$chipsamples$subgroup)
# show which IDs were processed in which batch
xtabs(~prepro_ht12$chipsamples$Sentrix.Barcode + prepro_ht12$chipsamples$processingbatch)
# do we have strange batches on top of regular batch structure
xtabs(~prepro_ht12$chipsamples$strangebatch + prepro_ht12$chipsamples$processingbatch)
## Quick Check for major differences based on Attributes from the provided ILLUMINA-sample-files
initial_pca <- calcInitialPCA(prepro_ht12)
# plot the results
pca.plot <- ggplot(initial_pca$scores, aes(PC1,
PC2,
pch = subgroup,
col = processingbatch,
size = Detected.Genes..0.01.,
alpha = in_study,
label = paste(old_ID, new_ID, sep = "\n"))) +
geom_point() +
scale_alpha_manual(values = c(0.7, 0.3))
# plot a static version
pca.plot
# interactive visualisation - check homogeinity etc.
pca.plot %>% ggplotly()
## ----pre.pro.step.3-----------------------------------------------------------
# create the ExpressionSet
prepro_ht12 <- createExpressionSet(prepro_ht12, paramfile = myparamfile) # watch the function output
# explore file output
# show the command history
kable(prepro_ht12$history)
# preview expressionset including samples and controls
prepro_ht12$total_nobkgd_eset
# detection levels per probe
hh(lumi::detection(prepro_ht12$total_nobkgd_eset))
# expression lvl per probe
hh(lumi::exprs(prepro_ht12$total_nobkgd_eset))
## ----pre.pro.step.4-----------------------------------------------------------
# execute step 4 by running this function
prepro_ht12 <- filterLowExpressed(prepro_ht12, paramfile = myparamfile)
# show command history
kable(prepro_ht12$history)
# preview function output
head(prepro_ht12$genesdetail)
# show excluded samples
setDT(prepro_ht12$chipsamples)
prepro_ht12$chipsamples[,.N,.(in_study, reason4exclusion)]
# and reset class of this object
setDF(prepro_ht12$chipsamples)
## ----pre.pro.step.5-----------------------------------------------------------
# Transform and normalize the data with this function
prepro_ht12 <- transformNormalizeHT12object(prepro_ht12, paramfile = myparamfile)
# execute the filter step described above with this function
prepro_ht12 <- filterTechnicallyFailed(prepro_ht12, paramfile = myparamfile)
# show command execution history
kable(prepro_ht12$history)
## ----pre.pro.step.6-----------------------------------------------------------
# filter for batch-size
prepro_ht12 <- filter4MinBatchsize(prepro_ht12, paramfile = myparamfile)
# repeat transformation and normalization
# This is only necessary if samples were removed in previous step
prepro_ht12 <- transformNormalizeHT12object(prepro_ht12, paramfile = myparamfile)
# execute batch- adjustment via sva::ComBat()
prepro_ht12 <- removeBatchEffects(prepro_ht12, paramfile = myparamfile)
# review command history
kable(prepro_ht12$history)
## ----pre.pro.step.7-----------------------------------------------------------------------------------------------------------------------------------------------------
# check for remaining batch-effects
prepro_ht12 <- checkBatchEffects(prepro_ht12, paramfile = myparamfile)
# preview the newly created object
ht(prepro_ht12$genesdetail, 2)
# review command history
kable(prepro_ht12$history)
## ----pre.pro.step.8-----------------------------------------------------------------------------------------------------------------------------------------------------
# filter for atypical expression levels
prepro_ht12 <- filterAtypicalExpressed(prepro_ht12, paramfile = myparamfile)
# preview newly created output
ht(prepro_ht12$chipsamples, 2)
# review command history
kable(prepro_ht12$history)
## ----pre.pro.step.9-----------------------------------------------------------------------------------------------------------------------------------------------------
# create QC-plots
prepro_ht12 <- visualizePreprocessing(prepro_ht12, paramfile = myparamfile)
# PCA before/after preprocessing
prepro_ht12 <- comparePCBeforeAfterPrePro(prepro_ht12, paramfile = myparamfile)
# review command history
kable(prepro_ht12$history)
## ----pre.pro.step.10----------------------------------------------------------------------------------------------------------------------------------------------------
## Set parameter to use provided renaming file
renaming_fn <- system.file("extdata", "renamingsamples.txt", package = "HT12ProcessoR")
# edit parameter file with location of renaming-file
myparams[ myparams$variable == "file_renaming_samples_tosend", "value"] <- renaming_fn
# write the parameter file
write.table(myparams,
myparamfile,
quote = F,
col.names = T,
row.names = F,
sep="\t")
# write result files into the tosend/ folder
prepro_ht12 = writeFilesTosend(prepro_ht12, paramfile = myparamfile)
# preview first and last line of $chipsamples object
ht(prepro_ht12$chipsamples, 1)
# review command history
kable(prepro_ht12$history)
## ----save.rdata---------------------------------------------------------------------------------------------------------------------------------------------------------
# save an .RData object containing the final prepro_ht12 object
save(prepro_ht12, file = paste0(prepro.folder, "/obj/prepro_example1_HT12object.RData"))
## ----methods.text, results = "asis"-------------------------------------------------------------------------------------------------------------------------------------
# create text-object
mm_text <- createTextForMethods(prepro_ht12, paramfile = myparamfile)
# preview created text as a long version
cat('### Extended Version')
cat(mm_text$extended_version)
# preview created text as a short version
cat('### Short Version')
cat(mm_text$short_version)
## ----session.info, results = 'markup'-----------------------------------------------------------------------------------------------------------------------------------
# output script run time
total.time <- round(as.numeric(Sys.time() - time.start), 2)
message("This script ran for a total of ", total.time, " minutes.")
# output session info
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.