Nothing
# The NanoStringNorm package is copyright (c) 2012 Ontario Institute for Cancer Research (OICR)
# This package and its accompanying libraries is free software; you can redistribute it and/or modify it under the terms of the GPL
# (either version 1, or at your option, any later version) or the Artistic License 2.0. Refer to LICENSE for the full license text.
# OICR makes no representations whatsoever as to the SOFTWARE contained herein. It is experimental in nature and is provided WITHOUT
# WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE OR ANY OTHER WARRANTY, EXPRESS OR IMPLIED. OICR MAKES NO REPRESENTATION
# OR WARRANTY THAT THE USE OF THIS SOFTWARE WILL NOT INFRINGE ANY PATENT OR OTHER PROPRIETARY RIGHT.
# By downloading this SOFTWARE, your Institution hereby indemnifies OICR against any loss, claim, damage or liability, of whatsoever kind or
# nature, which may arise from your Institution's respective use, handling or storage of the SOFTWARE.
# If publications result from research using this SOFTWARE, we ask that the Ontario Institute for Cancer Research be acknowledged and/or
# credit be given to OICR scientists, as scientifically appropriate.
sample.content.normalization <- function(x, anno, SampleContent = 'none', SampleContent.summary.target = NA, across.samples = TRUE, logged = FALSE, verbose = TRUE) {
# check if missing
if (is.na(SampleContent)) {
stop('SampleContent: SampleContent normalization method cannot be missing. Try setting to *none*');
}
# Sample Content Normalization
if (SampleContent != 'none') {
# get list of endogenous probes excluding viral genes from normalization factors due to potential associatio with phenotype
endogenous.genes <- grepl('Endogenous', anno$Code.Class) & !grepl('bkv-|ebv-|hcmv-|hiv1-|hsv1-|hsv2-|kshv-|mcv-',anno$Name);
# Check for Endogenous: take sum of housekeeping genes.
if (SampleContent == 'housekeeping.sum') {
rna.content <- apply(
X = x[anno$Code.Class %in% c('Control', 'Housekeeping', 'housekeeping'),,drop=FALSE ],
MARGIN = 2,
FUN = sum
);
}
else if (SampleContent == 'housekeeping.geo.mean') {
# take the geometric mean of the housekeeping genes.
rna.content <- apply(
X = x[anno$Code.Class %in% c('Control', 'Housekeeping', 'housekeeping'),,drop=FALSE ],
MARGIN = 2,
FUN = get.geo.mean,
logged = logged
);
}
# take mean of all counts
else if (SampleContent == 'total.sum') {
rna.content <- apply(
X = x[endogenous.genes,,drop=FALSE],
MARGIN = 2,
FUN = sum
);
}
# take geometric mean of any endogenous or HK genes that seem stable
else if (SampleContent == 'low.cv.geo.mean') {
# get a list of expressed stable genes
gene.summary.stats <- as.data.frame(get.gene.summary.stats(x, anno));
low.cv.genes <- (
grepl('endogenous|housekeeping|control', anno$Code.Class, ignore.case=TRUE) &
# gene.summary.stats$Mean > 10 &
gene.summary.stats$Mean > quantile(gene.summary.stats$Mean, 0.5, na.rm = TRUE) &
gene.summary.stats$CV < quantile(gene.summary.stats$CV, 0.5, na.rm = TRUE)
);
low.cv.genes.rank.lt10 <- rank(gene.summary.stats[low.cv.genes,'CV'], ties.method = 'random') <= 10;
if (verbose) {
cat('SampleContent: The following genes have been chosen to adjust for RNA Content.\n\n');
print(data.frame("HK.Candidates" = rownames(x[low.cv.genes,][low.cv.genes.rank.lt10,])));
cat('\n');
}
rna.content <- apply(
X = x[low.cv.genes,][low.cv.genes.rank.lt10,,drop=FALSE],
MARGIN = 2,
FUN = get.geo.mean,
logged = logged
);
}
else if (SampleContent == 'top.mean') {
# sum each RNA
sum.rna <- apply(
X = x[endogenous.genes,,drop=FALSE],
MARGIN = 1,
FUN = sum
);
# get the ranks of the RNA sums
rank.rna <- (length(sum.rna) + 1) - rank(sum.rna, ties.method = 'first');
rna.content <- apply(
X = x[endogenous.genes, ][rank.rna <= 75,,drop=FALSE],
MARGIN = 2,
FUN = mean,
na.rm = TRUE
);
}
else if (SampleContent == 'top.geo.mean') {
# sum each RNA
sum.rna <- apply(
X = x[endogenous.genes,,drop=FALSE],
MARGIN = 1,
FUN = sum
);
# get the ranks of the RNA sums
rank.rna <- (length(sum.rna) + 1) - rank(sum.rna, ties.method = 'first');
rna.content <- apply(
X = x[endogenous.genes,,drop=FALSE][rank.rna <= 75,,drop=FALSE],
MARGIN = 2,
FUN = get.geo.mean,
logged = logged
);
}
else {
stop('SampleContent: Unimplemented SampleContent method');
}
# handle cases where rna.content is estimated to be zero
if (any(rna.content < 1)) {
if (verbose) {
# print warning
cat('SampleContent: The following samples have estimates of zero RNA content. Consider removing them.\n\n');
print(names(rna.content[rna.content < 1]));
cat('\n');
}
# fudge rna.content to the expected minimum of 1.
rna.content[rna.content < 1] <- 1;
}
# check that the target housekeeping control count is an expected format
if(!is.na(SampleContent.summary.target) & !is.numeric(SampleContent.summary.target)) {
SampleContent.summary.target <- NA;
warning('The target housekeeping control value SampleContent.summary.target must be NA or a numeric. Setting it to NA.')
}
# if a target housekeeping control count is not provided, calculate the normalization factor as the ratio of mean vs sample values
if(is.na(SampleContent.summary.target)) {
sampleContent.norm.factor <- mean(rna.content) / rna.content;
# Otherwise use the provided count to calculate a normalization factor
} else {
sampleContent.norm.factor <- SampleContent.summary.target / rna.content;
}
# flag any samples that are outliers
rna.content.sd.from.mean <- data.frame(rna.zscore = (rna.content - mean(rna.content)) / sd(rna.content));
if (verbose & any(abs(rna.content.sd.from.mean) > 3)) {
cat('SampleContent: The following samples have sample/rna content greater than \n\t3 standard deviations from the mean.\n\n');
print(signif(subset(rna.content.sd.from.mean, abs(rna.content.sd.from.mean) > 3),3));
cat('\n');
}
# adjust the data based on the normalization factor
x <- t(apply(
X = x,
MARGIN = 1,
FUN = '*',
sampleContent.norm.factor
));
}
return(
list(
x = x,
sampleContent.norm.factor = sampleContent.norm.factor,
rna.content = rna.content
)
);
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.