# Artistic License 2.0
#
# Copyright (c) 2015, Ning Leng.
#
# Everyone is permitted to copy and distribute verbatim copies of this
# license document, but changing it is not allowed.
#
# Preamble
# ********
#
# This license establishes the terms under which a given free software
# Package may be copied, modified, distributed, and/or redistributed. The
# intent is that the Copyright Holder maintains some artistic control over
# the development of that Package while still keeping the Package
# available as open source and free software.
#
# You are always permitted to make arrangements wholly outside of this
# license directly with the Copyright Holder of a given Package. If the
# terms of this license do not permit the full use that you propose to
# make of the Package, you should contact the Copyright Holder and seek a
# different licensing arrangement.
#
# Definitions
# ***********
#
# "Copyright Holder" means the individual(s) or organization(s) named in
# the copyright notice for the entire Package.
#
# "Contributor" means any party that has contributed code or other
# material to the Package, in accordance with the Copyright Holder's
# procedures.
#
# "You" and "your" means any person who would like to copy, distribute, or
# modify the Package.
#
# "Package" means the collection of files distributed by the Copyright
# Holder, and derivatives of that collection and/or of those files. A
# given Package may consist of either the Standard Version, or a Modified
# Version.
#
# "Distribute" means providing a copy of the Package or making it
# accessible to anyone else, or in the case of a company or organization,
# to others outside of your company or organization.
#
# "Distributor Fee" means any fee that you charge for Distributing this
# Package or providing support for this Package to another party. It does
# not mean licensing fees.
#
# "Standard Version" refers to the Package if it has not been modified, or
# has been modified only in ways explicitly requested by the Copyright
# Holder.
#
# "Modified Version" means the Package, if it has been changed, and such
# changes were not explicitly requested by the Copyright Holder.
#
# "Original License" means this Artistic License as Distributed with the
# Standard Version of the Package, in its current version or as it may be
# modified by The Perl Foundation in the future.
#
# "Source" form means the source code, documentation source, and
# configuration files for the Package.
#
# "Compiled" form means the compiled bytecode, object code, binary, or any
# other form resulting from mechanical transformation or translation of
# the Source form.
#
# Permission for Use and Modification Without Distribution
# ********************************************************
#
# (1) You are permitted to use the Standard Version and create and use
# Modified Versions for any purpose without restriction, provided that you
# do not Distribute the Modified Version.
#
# Permissions for Redistribution of the Standard Version
# ******************************************************
#
# (2) You may Distribute verbatim copies of the Source form of the
# Standard Version of this Package in any medium without restriction,
# either gratis or for a Distributor Fee, provided that you duplicate all
# of the original copyright notices and associated disclaimers. At your
# discretion, such verbatim copies may or may not include a Compiled form
# of the Package.
#
# (3) You may apply any bug fixes, portability changes, and other
# modifications made available from the Copyright Holder. The resulting
# Package will still be considered the Standard Version, and as such will
# be subject to the Original License.
#
# Distribution of Modified Versions of the Package as Source
# **********************************************************
#
# (4) You may Distribute your Modified Version as Source (either gratis or
# for a Distributor Fee, and with or without a Compiled form of the
# Modified Version) provided that you clearly document how it differs from
# the Standard Version, including, but not limited to, documenting any
# non-standard features, executables, or modules, and provided that you do
# at least ONE of the following:
#
# (a) make the Modified Version available to the Copyright Holder of the
# Standard Version, under the Original License, so that the Copyright
# Holder may include your modifications in the Standard Version.
#
# (b) ensure that installation of your Modified Version does not prevent
# the user installing or running the Standard Version. In addition, the
# Modified Version must bear a name that is different from the name of the
# Standard Version.
#
# (c) allow anyone who receives a copy of the Modified Version to make the
# Source form of the Modified Version available to others under
#
# (i) the Original License or
#
# (ii) a license that permits the licensee to freely copy, modify and
# redistribute the Modified Version using the same licensing terms that
# apply to the copy that the licensee received, and requires that the
# Source form of the Modified Version, and of any works derived from it,
# be made freely available in that license fees are prohibited but
# Distributor Fees are allowed.
#
# Distribution of Compiled Forms of the Standard Version or Modified
# ******************************************************************
# Versions without the Source
# ***************************
#
# (5) You may Distribute Compiled forms of the Standard Version without
# the Source, provided that you include complete instructions on how to
# get the Source of the Standard Version. Such instructions must be valid
# at the time of your distribution. If these instructions, at any time
# while you are carrying out such distribution, become invalid, you must
# provide new instructions on demand or cease further distribution. If
# you provide valid instructions or cease distribution within thirty days
# after you become aware that the instructions are invalid, then you do
# not forfeit any of your rights under this license.
#
# (6) You may Distribute a Modified Version in Compiled form without the
# Source, provided that you comply with Section 4 with respect to the
# Source of the Modified Version.
#
# Aggregating or Linking the Package
# **********************************
#
# (7) You may aggregate the Package (either the Standard Version or
# Modified Version) with other packages and Distribute the resulting
# aggregation provided that you do not charge a licensing fee for the
# Package. Distributor Fees are permitted, and licensing fees for other
# components in the aggregation are permitted. The terms of this license
# apply to the use and Distribution of the Standard or Modified Versions
# as included in the aggregation.
#
# (8) You are permitted to link Modified and Standard Versions with other
# works, to embed the Package in a larger work of your own, or to build
# stand-alone binary or bytecode versions of applications that include the
# Package, and Distribute the result without restriction, provided the
# result does not expose a direct interface to the Package.
#
# Items That are Not Considered Part of a Modified Version
# ********************************************************
#
# (9) Works (including, but not limited to, modules and scripts) that
# merely extend or make use of the Package, do not, by themselves, cause
# the Package to be a Modified Version. In addition, such works are not
# considered parts of the Package itself, and are not subject to the terms
# of this license.
#
# General Provisions
# ******************
#
# (10) Any use, modification, and distribution of the Standard or Modified
# Versions is governed by this Artistic License. By using, modifying or
# distributing the Package, you accept this license. Do not use, modify,
# or distribute the Package, if you do not accept this license.
#
# (11) If your Modified Version has been derived from a Modified Version
# made by someone other than you, you are nevertheless required to ensure
# that your Modified Version complies with the requirements of this
# license.
#
# (12) This license does not grant you the right to use any trademark,
# service mark, tradename, or logo of the Copyright Holder.
#
# (13) This license includes the non-exclusive, worldwide, free-of-charge
# patent license to make, have made, use, offer to sell, sell, import and
# otherwise transfer the Package with respect to any patent claims
# licensable by the Copyright Holder that are necessarily infringed by the
# Package. If you institute patent litigation (including a cross-claim or
# counterclaim) against any party alleging that the Package constitutes
# direct or contributory patent infringement, then this Artistic License
# to you shall terminate on the date that such litigation is filed.
#
# (14) Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT
# HOLDER AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES. THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT
# PERMITTED BY YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT
# HOLDER OR CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE
# OF THE PACKAGE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#' @title Run k-medoid algorithm with varying k on similarity matrix
#' @usage scanK(SimiMatIn, quan=.95,cut=NULL, maxK=NULL,minSize=0, maxSize=200, fixK=NULL, rawscale=FALSE)
#' @param SimiMatIn gene-by-gene similarity matrix
#' @param quan only gene pairs with similarity score >= quan th quantile will be
#' considered in the cluster analyses. Default is 0.95.
#' @param cut pre-defined cutoff. Gene pairs with similarity score >= cut will be considered in
#' cluster analyses. If cut is defined, quan will be ignored.
#' @param maxK max number of clusters to consider (scan). if numbC=NULL, it will be calculated as
#' [number of gene considered]/10.
#' @param minSize,maxSize Only clusters with minSize<= cluster size <= maxSize are
#' reported in output.
#' @param fixK if fixK is specified, the k-medoids algorithm will be applied with fixK clusters.
#' @param rawscale
#' Recall the input
#' is the similarity matrix (-log10(distance from the sine model)).
#' the k-medoids clustering will be applied using (-Input) as distance. If rawscale is defined as TRUE,
#' the k-medoids clustering will be applied using -10^Input as distance.
#' @return scanK() function runs k-medoid clustering with varying number of clusters (k).
#' The k is varied from 2 to maxK. The input of scanK() function should be a similarity matrix.
#' scanK() function will cluster genes in gene pairs with high similarity score (the threshold can be
#' defined using parameter quan). To select the top genes, the function first calculate the max similarity
#' score for each gene, then select the genes with high max score.
#'
#' The output object is a list with 4 sublists:
#' membOut: members in each cluster. clusters are sorted by median similarity score within cluster;
#'
#' MedCor: median similarity score for each cluster;
#'
#' Mat: input similarity matrix;
#'
#' filteredMat: similarity matrix, only showing the top genes used in clustering;
#'
#' Kcluster: cluster indicator of each top gene.
#' @examples aa <- sin(seq(0,1,.1))
#' bb <- sin(seq(0.5,1.5,.1))
#' cc <- sin(seq(0.9,1.9,.1))
#' tmp <- matrix(sin(rnorm(330)),ncol=11)
#' rownames(tmp) <- paste0("tmp",1:30)
#' Dat <- rbind(aa, bb, cc, tmp)
#' res1 <- OscopeSine(Dat)
#' res2 <- scanK(res1$SimiMat, quan=.8, maxK=5)
#' @author Ning Leng
scanK <- function(SimiMatIn, quan=.95, cut=NULL, maxK=NULL,minSize=0, maxSize=200,fixK=NULL,
rawscale=FALSE){
if(is.null(rownames(SimiMatIn))) stop("Row names are not provided!")
if(is.null(colnames(SimiMatIn))) stop("Column names are not provided!")
if(length(unique(rownames(SimiMatIn)))!=nrow(SimiMatIn)) stop("Duplicated gene/isoform names!")
expect_is(SimiMatIn, "matrix")
expect_equal(nrow(SimiMatIn),ncol(SimiMatIn))
#library(cluster)
# RM ones with Inf
WhichInf <- which(rowSums(abs(SimiMatIn))==Inf)
if(length(WhichInf)>0)SimiMatIn <- SimiMatIn[-WhichInf,-WhichInf]
cor_max <- sapply(1:nrow(SimiMatIn),function(i)max(SimiMatIn[i,-i],na.rm=TRUE))
QQ <- quantile(cor_max,quan,na.rm=TRUE)
if(!is.null(cut))QQ <- cut
expect_is(QQ,c("numeric","integer"))
message("gene pairs above this threshold are considered:")
message(QQ)
wcm <- which(cor_max>=QQ)
sMatCor <- SimiMatIn[wcm, wcm]
expect_is(sMatCor, "matrix")
numC <- maxK
if(is.null(fixK)){
if (is.null(numC))
numC <- ceiling((dim(SimiMatIn)[1]/10)*(1-quan))
if(numC<2) numC <- 2
message("max number of clusters considered:",numC)
TryList <- vector("list",numC-1)
asw <- rep(NA,numC-1)
for(i in 2:numC){
if(rawscale==FALSE){
TryList[[i-1]] <- pam(dist(-sMatCor), k=i)}
if(rawscale==TRUE){
temp <- 10^(-sMatCor)
diag(temp)=0
TryList[[i-1]] <- pam(dist(temp), k=i)}
asw[i-1] <- TryList[[i-1]]$ silinfo $ avg.width
}
k.best <- which.max(asw)
expect_is(k.best, "integer")
message("optimal number of clusters:", k.best+1)
Try <- TryList[[k.best]]
}
if(!is.null(fixK)){
if(rawscale==FALSE)Try <- pam(dist(-sMatCor), k=fixK )
if(rawscale==TRUE){
temp <- 10^(-sMatCor)
diag(temp) <- 0
Try <- pam(dist(temp), k=fixK )}
}
memb <- Try$clustering
a <- table(memb)
a2 <- a[which(a>=minSize & a <=maxSize)]
membOut <- sapply(1:length(a2),function(i)names(memb)[which(memb==names(a2)[i])],simplify=FALSE)
MeanCor <- sapply(membOut,function(i)median(SimiMatIn[i,i]))
expect_is(MeanCor,c("numeric","integer"))
Order <- order(MeanCor,decreasing=TRUE)
membOutSort <- membOut[Order]
MeanCorSort <- MeanCor[Order]
outNames <- paste0("cluster",1:length(MeanCor))
names(membOutSort) <- outNames
names(MeanCorSort) <- outNames
return(Out <- list(membOut=membOutSort,MedCor=MeanCorSort,
Mat=SimiMatIn,filteredMat=sMatCor,
Kcluster=memb))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.