# Artistic License 2.0
#
# Copyright (c) 2015, Ning Leng.
#
# Everyone is permitted to copy and distribute verbatim copies of this
# license document, but changing it is not allowed.
#
# Preamble
# ********
#
# This license establishes the terms under which a given free software
# Package may be copied, modified, distributed, and/or redistributed. The
# intent is that the Copyright Holder maintains some artistic control over
# the development of that Package while still keeping the Package
# available as open source and free software.
#
# You are always permitted to make arrangements wholly outside of this
# license directly with the Copyright Holder of a given Package. If the
# terms of this license do not permit the full use that you propose to
# make of the Package, you should contact the Copyright Holder and seek a
# different licensing arrangement.
#
# Definitions
# ***********
#
# "Copyright Holder" means the individual(s) or organization(s) named in
# the copyright notice for the entire Package.
#
# "Contributor" means any party that has contributed code or other
# material to the Package, in accordance with the Copyright Holder's
# procedures.
#
# "You" and "your" means any person who would like to copy, distribute, or
# modify the Package.
#
# "Package" means the collection of files distributed by the Copyright
# Holder, and derivatives of that collection and/or of those files. A
# given Package may consist of either the Standard Version, or a Modified
# Version.
#
# "Distribute" means providing a copy of the Package or making it
# accessible to anyone else, or in the case of a company or organization,
# to others outside of your company or organization.
#
# "Distributor Fee" means any fee that you charge for Distributing this
# Package or providing support for this Package to another party. It does
# not mean licensing fees.
#
# "Standard Version" refers to the Package if it has not been modified, or
# has been modified only in ways explicitly requested by the Copyright
# Holder.
#
# "Modified Version" means the Package, if it has been changed, and such
# changes were not explicitly requested by the Copyright Holder.
#
# "Original License" means this Artistic License as Distributed with the
# Standard Version of the Package, in its current version or as it may be
# modified by The Perl Foundation in the future.
#
# "Source" form means the source code, documentation source, and
# configuration files for the Package.
#
# "Compiled" form means the compiled bytecode, object code, binary, or any
# other form resulting from mechanical transformation or translation of
# the Source form.
#
# Permission for Use and Modification Without Distribution
# ********************************************************
#
# (1) You are permitted to use the Standard Version and create and use
# Modified Versions for any purpose without restriction, provided that you
# do not Distribute the Modified Version.
#
# Permissions for Redistribution of the Standard Version
# ******************************************************
#
# (2) You may Distribute verbatim copies of the Source form of the
# Standard Version of this Package in any medium without restriction,
# either gratis or for a Distributor Fee, provided that you duplicate all
# of the original copyright notices and associated disclaimers. At your
# discretion, such verbatim copies may or may not include a Compiled form
# of the Package.
#
# (3) You may apply any bug fixes, portability changes, and other
# modifications made available from the Copyright Holder. The resulting
# Package will still be considered the Standard Version, and as such will
# be subject to the Original License.
#
# Distribution of Modified Versions of the Package as Source
# **********************************************************
#
# (4) You may Distribute your Modified Version as Source (either gratis or
# for a Distributor Fee, and with or without a Compiled form of the
# Modified Version) provided that you clearly document how it differs from
# the Standard Version, including, but not limited to, documenting any
# non-standard features, executables, or modules, and provided that you do
# at least ONE of the following:
#
# (a) make the Modified Version available to the Copyright Holder of the
# Standard Version, under the Original License, so that the Copyright
# Holder may include your modifications in the Standard Version.
#
# (b) ensure that installation of your Modified Version does not prevent
# the user installing or running the Standard Version. In addition, the
# Modified Version must bear a name that is different from the name of the
# Standard Version.
#
# (c) allow anyone who receives a copy of the Modified Version to make the
# Source form of the Modified Version available to others under
#
# (i) the Original License or
#
# (ii) a license that permits the licensee to freely copy, modify and
# redistribute the Modified Version using the same licensing terms that
# apply to the copy that the licensee received, and requires that the
# Source form of the Modified Version, and of any works derived from it,
# be made freely available in that license fees are prohibited but
# Distributor Fees are allowed.
#
# Distribution of Compiled Forms of the Standard Version or Modified
# ******************************************************************
# Versions without the Source
# ***************************
#
# (5) You may Distribute Compiled forms of the Standard Version without
# the Source, provided that you include complete instructions on how to
# get the Source of the Standard Version. Such instructions must be valid
# at the time of your distribution. If these instructions, at any time
# while you are carrying out such distribution, become invalid, you must
# provide new instructions on demand or cease further distribution. If
# you provide valid instructions or cease distribution within thirty days
# after you become aware that the instructions are invalid, then you do
# not forfeit any of your rights under this license.
#
# (6) You may Distribute a Modified Version in Compiled form without the
# Source, provided that you comply with Section 4 with respect to the
# Source of the Modified Version.
#
# Aggregating or Linking the Package
# **********************************
#
# (7) You may aggregate the Package (either the Standard Version or
# Modified Version) with other packages and Distribute the resulting
# aggregation provided that you do not charge a licensing fee for the
# Package. Distributor Fees are permitted, and licensing fees for other
# components in the aggregation are permitted. The terms of this license
# apply to the use and Distribution of the Standard or Modified Versions
# as included in the aggregation.
#
# (8) You are permitted to link Modified and Standard Versions with other
# works, to embed the Package in a larger work of your own, or to build
# stand-alone binary or bytecode versions of applications that include the
# Package, and Distribute the result without restriction, provided the
# result does not expose a direct interface to the Package.
#
# Items That are Not Considered Part of a Modified Version
# ********************************************************
#
# (9) Works (including, but not limited to, modules and scripts) that
# merely extend or make use of the Package, do not, by themselves, cause
# the Package to be a Modified Version. In addition, such works are not
# considered parts of the Package itself, and are not subject to the terms
# of this license.
#
# General Provisions
# ******************
#
# (10) Any use, modification, and distribution of the Standard or Modified
# Versions is governed by this Artistic License. By using, modifying or
# distributing the Package, you accept this license. Do not use, modify,
# or distribute the Package, if you do not accept this license.
#
# (11) If your Modified Version has been derived from a Modified Version
# made by someone other than you, you are nevertheless required to ensure
# that your Modified Version complies with the requirements of this
# license.
#
# (12) This license does not grant you the right to use any trademark,
# service mark, tradename, or logo of the Copyright Holder.
#
# (13) This license includes the non-exclusive, worldwide, free-of-charge
# patent license to make, have made, use, offer to sell, sell, import and
# otherwise transfer the Package with respect to any patent claims
# licensable by the Copyright Holder that are necessarily infringed by the
# Package. If you institute patent litigation (including a cross-claim or
# counterclaim) against any party alleging that the Package constitutes
# direct or contributory patent infringement, then this Artistic License
# to you shall terminate on the date that such litigation is filed.
#
# (14) Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT
# HOLDER AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES. THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT
# PERMITTED BY YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT
# HOLDER OR CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE
# OF THE PACKAGE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#' @title Calculate estimated mean and variance of RNA-Seq data
#' @usage CalcMV(Data, Sizes=NULL, NormData=FALSE, MeanCutLow=100, MeanCutHigh=NULL, ApproxVal=10^-6, Plot=TRUE)
#' @param Data input data matrix; it should be a gene-by-sample or isoform-by sample matrix
#' @param Sizes The library size factor for each sample. the number of values in Sizes is expected to be the
#' same as the number of columns of Data. The library size factor will be estimated using the median
#' normalization method implemented in EBSeq if Sizes is specified as NULL.
#' @param MeanCutLow,MeanCutHigh we suggests the users to apply Oscope on genes with high mean and
#' high variance. By default, MeanCutLow is specified as 100, consequently only genes with mean > 100
#' will be used. The CalcMV function will fit a linear regression on log(variance)~log(mean) on these
#' genes. Genes with variance above this line are considered as the high mean high variance genes.
#' The upper bound of mean may be specified using MeanCutHigh. If both are specified as NULL, all of the genes
#' will be considered when fitting the regression.
#' @param NormData whether the data is already normalized. If NormData=TRUE, the specification of Sizes
#' will be ignored and no normalization will be applied.
#' @param ApproxVal Default is 10^-6. It is used to approximate the estimate of parameter q for genes/isoforms
#' whose estimated variance is less than estimated mean. q will be estimated using 1-ApproxVal
#' @param Plot if Plot = T, a mean-variance plot will be shown. The fitted line will be shown and the
#' selected genes will be marked in green.
#' @return Output is a list with 6 sublists : Mean: estimated means of genes/isoforms; Var: estimated variances;
#' Median: estimated medians; GeneToUse: the high mean high variance genes (suggested input for Oscope);
#' Q: estimated q's (without apporximation); Q_mdf: estimated q's with approximations;
#' Phi_mdf: estimated overdispersion parameter (phi), with approximations.
#' @examples
#' exp=matrix(rnorm(100,1000,10),ncol=10)
#' rownames(exp)=paste0("g",1:10)
#' CalcMV(exp)
#' @author Ning Leng
CalcMV<-function(Data, Sizes=NULL, NormData=FALSE, MeanCutLow=100, MeanCutHigh=NULL, ApproxVal=10^-6, Plot=TRUE){
expect_is(Data, "matrix")
expect_is(rownames(Data), "character")
EmpData <- Data
EmpSizes <- Sizes
if(is.null(Sizes))EmpSizes <- MedianNorm(EmpData)
expect_is(EmpSizes,c("numeric","integer"))
# Calculate normalized means
if(NormData==FALSE)EmpData.norm <- t(t( EmpData )/EmpSizes)
else EmpData.norm <- EmpData
MeansC1 <- rowMeans(EmpData.norm)
MedC1 <- apply(EmpData.norm,1,median)
expect_is(MeansC1,c("numeric","integer"))
expect_is(MedC1,c("numeric","integer"))
# Calculate var
Sig_tmp <- (EmpData-MeansC1%*%t(EmpSizes))^2
Sig_tmp2 <- t(t(Sig_tmp)/EmpSizes)
VarC1 <- rowMeans(Sig_tmp2)
expect_is(VarC1,c("numeric","integer"))
# calculate q
QC1 <- MeansC1/VarC1
# Some genes are with mean>=var (q>=1)
# In this case, use 1-10^-6 to approximate q
QNB <- QC1
QNB[which(QNB>=1)] <- ApproxVal
# calculate phi
PhiNB <- (1-QNB)/(MeansC1*QNB)
PhiInput <- PhiNB
# option to simulate constant phi for all genes
MVOut <- list(Mean=MeansC1,Var=VarC1, Median=MedC1,
Q=QC1, Q_mdf=QNB, Phi_mdf=PhiNB)
SampleMean <- MVOut$Mean
SampleVar <- MVOut$Var
Which <- 1:length(SampleMean)
if(!is.null(MeanCutLow))
{
Which <- which(SampleMean>MeanCutLow)
if(!is.null(MeanCutHigh))
Which <- intersect(Which,which(SampleMean<MeanCutHigh))
}
if(length(Which)<3)stop("Too few genes are selected based on the settings of MeanCutHigh and MeanCutLow!")
Meanfit <- log10(SampleMean[Which])
Varfit <- log10(SampleVar[Which])
lm1 <- lm(Varfit~Meanfit)
expect_is(lm1, "lm")
Coef <- coef(lm1)
Gt10 <- names(SampleMean)[Which]
MeanUse <- SampleMean[Gt10]
VarUse <- SampleVar[Gt10]
Fit <- 10^(Coef[1] + Coef[2]*log10(MeanUse))
expect_is(Fit, c("numeric","integer"))
Diff <- VarUse-Fit
SamplePickGenes <- names(MeanUse)[which(Diff>0)]
if(Plot==TRUE){
plot(SampleMean, SampleVar, col="gray",pch=21, xlab="Mean",
ylab="Variance", log="xy")
lines(MeanUse[order(MeanUse)], Fit[order(MeanUse)])
points(MeanUse[SamplePickGenes], VarUse[SamplePickGenes], pch=21,col="green")
abline(v=c(MeanCutLow, MeanCutHigh))}
out<-list(Mean=MeansC1,Var=VarC1, Median=MedC1, GeneToUse=SamplePickGenes,
Q=QC1, Q_mdf=QNB, Phi_mdf=PhiNB)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.