###########################
##### Normalization ######
###########################
library(nanostringr)
library(NanoStringNorm)
library(NAPPA)
library(nanoR)
#' Normalize a Nanostring dataset
#'
#' Normalization through different methods in several packages
#'
#' @param raw.data rcc type file. List of 4 elements:
#' Samples.IDs is a dataframe with four columns: sample name, status (WildType, Mutated), filename.
#' rcc.df contains expression levels with samples in column and genes in rows.
#' annots.df is a dataframe with 3 columns: geneclass (endogenous, housekeeping, negative, positive), gene name, and the accession number.
#' FOV is a dataframe that contains Field of views information.
#'
#' @param tool Normalization tool. "nappa.NS", "nappa.param1","nappa.param2","nappa.param3" are different parameters used with the NAPPA() function from the NAPPA package.
#' "nanostringnorm.default","nanostringnorm.param1","nanostringnorm.param2" use the NanoStringNorm() normalization function from the package NanoStringNorm
#' "nanostringR" uses the HKnorm() function from the package nanostringr.
#' "nanoR.top100","nanoR.total" uses the nsNormalize() function from the nanoR package.
#' For the nanoR package, it is needed to give the file path to rcc files
#'
#' @param nanoR Logical value. TRUE is necessary if the tool used is "nanoR.top100" or "nanoR.total". By default, the value is set on FALSE
#' @param dir directory of rcc files.
#' This parameter is only necessary if the nanoR normalizations are wanted.
#'
#' @return dataframe of normalized expression values
#'
#' @import "nanostringr" "NanoStringNorm" "NAPPA" "nanoR"
#' @export
#'
#' @examples
#' # Retrieve Nanostring Data
#' # Data = Simul.data(type = "Nanostring")
#' # Normalize data using one method :
#' # Norm.data = tools.norm.Nanostring(raw.data = Data, tool = "nappa.NS",nanoR=F)
#' #with nanoR : Give the rcc files location
#' #RCC.dir <- file.path("./DATA/NANOSTRING","GSE146204_RAW")
#' #Norm.data = tools.norm.Nanostring(raw.data = Data, tool = "nanoR.top100",dir = RCC.dir,nanoR=T)
tools.norm.Nanostring <- function(raw.data,tool,nanoR=F,dir = NULL){
# if the method is "nanoR.top100" or "nanoR.total", the function needs other informations contained in other files
if(!nanoR){
rcc.samples <- raw.data$rcc.df
annots.df <- raw.data$annots.df
samples.IDs <- raw.data$samples.IDs
FOV <- raw.data$FOV
dir = NULL
}
if (missing(dir) & (nanoR = T)) {
data.dir <- "./DATA/NANOSTRING"
RCC.dir <- file.path(data.dir,"GSE146204_RAW")
samples.IDs <- raw.data$samples.IDs
}
else if(nanoR){
samples.IDs <- raw.data$samples.IDs
raw.data = dir
}
tools.fnc <- switch(tool,
nanostringR={
rcc.samples <- cbind(annots.df,rcc.samples)
colnames(rcc.samples)[1:3] <- c("Code.Class", "Name", "Accession")
res.norm <- HKnorm(rcc.samples)
row.names(res.norm) <- res.norm$Name
res.norm <- res.norm[,-(1:3)]
colnames(res.norm) <- samples.IDs$ID
res.norm
},
nappa.NS={
rcc.samples <- cbind(annots.df,rcc.samples)
hk <- rcc.samples[grep("Housekeeping",rcc.samples$CodeClass),]
hk$CodeClass <- "Endogenous"
rcc.samples <- rbind(rcc.samples,hk)
res.norm <- NAPPA(rcc.samples,tissueType = "tumour",nposcontrols = 6,scaleFOV = F,output="All",raise.low.counts=1,poscontrol.method = "geometric.mean",background.method = "none",hk.method = "subtract")$GeneExpression
colnames(res.norm) <- samples.IDs$ID
res.norm
},
nappa.default={
rcc.samples <- cbind(annots.df,rcc.samples)
rcc.samples <- rbind(FOV,rcc.samples)
hk <- rcc.samples[grep("Housekeeping",rcc.samples$CodeClass),]
hk$CodeClass <- "Endogenous"
rcc.samples <- rbind(rcc.samples,hk)
res.norm <- NAPPA(rcc.samples,tissueType = "tumour")
colnames(res.norm) <- samples.IDs$ID
res.norm
},
nappa.param1={
rcc.samples <- cbind(annots.df,rcc.samples)
rcc.samples <- rbind(FOV,rcc.samples)
hk <- rcc.samples[grep("Housekeeping",rcc.samples$CodeClass),]
hk$CodeClass <- "Endogenous"
rcc.samples <- rbind(rcc.samples,hk)
res.norm <- NAPPA(rcc.samples,tissueType = "tumour",nposcontrols = 6,scaleFOV = TRUE,output="All",raise.low.counts=25)$GeneExpression
colnames(res.norm) <- samples.IDs$ID
res.norm
},
nappa.param3={
rcc.samples <- cbind(annots.df,rcc.samples)
rcc.samples <- rbind(FOV,rcc.samples)
hk <- rcc.samples[grep("Housekeeping",rcc.samples$CodeClass),]
hk$CodeClass <- "Endogenous"
rcc.samples <- rbind(rcc.samples,hk)
res.norm <- NAPPA(rcc.samples,tissueType = "tumour",nposcontrols = 6,scaleFOV = TRUE,output="All",raise.low.counts=1)$GeneExpression
colnames(res.norm) <- samples.IDs$ID
res.norm
},
nappa.param2={
rcc.samples <- cbind(annots.df,rcc.samples)
rcc.samples <- rbind(FOV,rcc.samples)
hk <- rcc.samples[grep("Housekeeping",rcc.samples$CodeClass),]
hk$CodeClass <- "Endogenous"
rcc.samples <- rbind(rcc.samples,hk)
res.norm <- NAPPA(rcc.samples,tissueType = "tumour",sampleNumber=10,hk.method="shrunken.subtract",nposcontrols = 6,scaleFOV = TRUE,output="All",raise.low.counts=25)$GeneExpression
colnames(res.norm) <- samples.IDs$ID
},
nanostringnorm.default={
rcc.samples <- cbind(annots.df,rcc.samples)
rcc.samples$CodeClass[rcc.samples$CodeClass=="Housekeeping"] <- "Endogenous"
res.norm <- NanoStringNorm(rcc.samples,take.log = TRUE,return.matrix.of.endogenous.probes = TRUE)
},
nanostringnorm.param1={
rcc.samples <- cbind(annots.df,rcc.samples)
rcc.samples$CodeClass[rcc.samples$CodeClass=="Housekeeping"] <- "Endogenous"
res.norm <- NanoStringNorm(rcc.samples,CodeCount = 'geo.mean',Background = 'mean.2sd',SampleContent = 'low.cv.geo.mean',round.values = TRUE,take.log = TRUE,return.matrix.of.endogenous.probes = TRUE)
},
nanostringnorm.param2={
design <- model.matrix(~0+samples.IDs$tp53.status)
colnames(design) <- c("Mutated","WildType")
rcc.samples <- cbind(annots.df,rcc.samples)
rcc.samples$CodeClass[rcc.samples$CodeClass=="Housekeeping"] <- "Endogenous"
res.norm <- NanoStringNorm(rcc.samples,Background = 'mean.2sd',SampleContent = 'low.cv.geo.mean',CodeCount="sum",traits=design,round.values = TRUE,take.log = TRUE,return.matrix.of.endogenous.probes = TRUE)
},
nanoR.top100={
nano <- parseRCC(dir = raw.data)
nano <- nsBackgroundCorrect(nano)
nano <- nsPositiveControlNormalization(nano)
res.norm <- nsNormalize(nano, method="top100")$bg.corr.counts
res.norm <- log2(res.norm[!grepl("Neg|Pos",res.norm$CodeClass),-c(1:3)]+1)
colnames(res.norm) <- samples.IDs$title
res.norm
},
nanoR.total={
nano <- parseRCC(dir = raw.data)
nano <- nsBackgroundCorrect(nano)
nano <- nsPositiveControlNormalization(nano)
res.norm <- nsNormalize(nano, method="total")$bg.corr.counts
res.norm <- log2(res.norm[!grepl("Neg|Pos",res.norm$CodeClass),-c(1:3)]+1)
colnames(res.norm) <- samples.IDs$title
res.norm
},
stop("Enter something that switches me!")
)
return(res.norm)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.