#######################################################
#######################################################
# estimateGamma
# This function estimates the "gamma" parameter in the variable
# slope normalization model for RPPAs
# INPUTS
# Xhat: A matrix with samples in the rows and proteins in the columns
# to be used. This matrix should have already been adjusted for
# for column effects (the column median should have already been subtracted off)
# method: The regression method that is used to estimate the "gammas". The default
# is "pca" which uses perpendicular regression. There is also a method
# that just uses regular least squares regression. This first estimates
# the ratio and then estimates the inverse, but yields similar results to
# perpendicular regression.
estimateGamma <- function(Xhat,method="pca") {
#####################
# use perpendicular regression
if(method=="pca") {
nCol <- ncol(Xhat)
gamma <- matrix(0,nrow=nCol,ncol=nCol)
means <- apply(Xhat,2,mean)
for (i in 1:(nCol-1)) {
for (j in (i+1):nCol) {
r <- cor(Xhat[,i],Xhat[,j],use="complete.obs")
a <- Xhat[,i]
n <- length(a)
tt <- r*sqrt((n-2)/(1-r^2))
chk <- pt(tt,n-2,lower.tail=F)
if (chk < .05) {
eig <- eigen(var(cbind(Xhat[,i],Xhat[,j]),na.rm=T))
tmp <- (-1)*eig$vectors[1,2]/eig$vectors[2,2]
gamma[i,j] <- tmp
}
}
}
gamma[gamma<=0] <- 1
upper <- upper.tri(gamma)
ind <- which(upper,arr.ind=T)
design <- matrix(0,ncol=nCol,nrow=nrow(ind))
for(i in 1:nrow(ind)) {
design[i,ind[i,1]] <- -1
design[i,ind[i,2]] <- 1
}
loggamma <- log(gamma[upper])
newrow <- rep((1/nCol),nCol)
nonsingular <- rbind(newrow,design)
lestimateMean <- qr.solve(nonsingular,c(0,loggamma))
estimate1 <- exp(lestimateMean)
val <- estimate1
}
####################
# use least squares
if(method!="pca") {
nCol <- ncol(Xhat)
gamma <- matrix(0,nrow=nCol,ncol=nCol)
means <- apply(Xhat,2,mean,na.rm=T)
for (i in 1:(nCol-1)) {
for (j in (i+1):nCol) {
gamma[i,j] <-
sum((Xhat[,i]-means[i])*(Xhat[,j]-means[j]),na.rm=T)/sum((Xhat[,i]-means[i])^2,na.rm=T)
}
}
gamma[gamma<0] <- 1
upper <- upper.tri(gamma)
ind <- which(upper,arr.ind=T)
design <- matrix(0,ncol=nCol,nrow=nrow(ind))
for(i in 1:nrow(ind)) {
design[i,ind[i,1]] <- -1
design[i,ind[i,2]] <- 1
}
loggamma <- log(gamma[upper])
newrow <- rep((1/nCol),nCol)
nonsingular <- rbind(newrow,design)
lestimateMean <- qr.solve(nonsingular,c(0,loggamma))
estimate1 <- exp(lestimateMean)
gamma <- matrix(0,nrow=nCol,ncol=nCol)
means <- apply(Xhat,2,mean,na.rm=T)
for (i in 1:(nCol-1)) {
for (j in (i+1):nCol) {
gamma[i,j] <-
sum((Xhat[,j]-means[j])*(Xhat[,i]-means[i]),na.rm=T)/sum((Xhat[,j]-means[j])^2,na.rm=T)
}
}
gamma[gamma<0] <- 1
upper <- upper.tri(gamma)
ind <- which(upper,arr.ind=T)
design <- matrix(0,ncol=nCol,nrow=nrow(ind))
for(i in 1:nrow(ind)) {
design[i,ind[i,1]] <- 1
design[i,ind[i,2]] <- -1
}
loggamma <- log(gamma[upper])
newrow <- rep((1/nCol),nCol)
nonsingular <- rbind(newrow,design)
lestimateMean <- qr.solve(nonsingular,c(0,loggamma))
estimate2 <- exp(lestimateMean)
val <- (estimate1 + estimate2)/2
}
val
}
#######################################################
#######################################################
# pair score function
# This function computes the percent inconsistant when clustering
# the replicates separately.
#
# INPUTS
# data1: A matrix with samples in the rows and proteins in the columns
# to be clustered (replicate 1).
# data2: A matrix similar to data1 that will also be clustered (replicate 2)
# distmet: The distance metric to be used. The default is pearson correlation coefficient
# cmet: The linkage method. Default is average linkage
# k: Number of groups. Default is 8
pairscore <- function(data1,data2,distmet="pearson",cmet="average",k=8) {
antibody2 <- dimnames(data2)[[2]]
antibody1 <- dimnames(data1)[[2]]
if (sum(antibody1 != antibody2) != 0) { stop("Names don't match") }
hc1 <- hclust(distanceMatrix(data1, distmet), cmet)
cutter1 <- cutree(hc1,k=k)
names(cutter1) <- antibody1
sc1 <- matrix(NA,ncol=length(antibody1),nrow=length(antibody1))
for (i in 1:(length(antibody1)-1)) {
for (j in (i+1):length(antibody1)) {
sc1[i,j] <- (cutter1[antibody1[i]]==cutter1[antibody1[j]])+0
}
}
hc2 <- hclust(distanceMatrix(data2, distmet), cmet)
cutter2 <- cutree(hc2,k=k)
names(cutter2) <- antibody2
sc2 <- matrix(NA,ncol=length(antibody2),nrow=length(antibody2))
for (i in 1:(length(antibody2)-1)) {
for (j in (i+1):length(antibody2)) {
sc2[i,j] <- (cutter2[antibody2[i]]==cutter2[antibody2[j]])+0
}
}
sc1 <- sc1[upper.tri(sc1)]
sc2 <- sc2[upper.tri(sc2)]
tmp <- abs(sc1-sc2)
sum(tmp)/length(tmp)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.