# R/00-pairedStat.R In NewmanOmics: Extending the Newman Studentized Range Statistic to Transcriptomics

#### Documented in pairedStat

```setClass("NewmanPaired",
slots = c(
nu.statistics = "matrix",
p.values = "matrix",
pairedMean = "matrix",
difference = "matrix",
smoothSD = "matrix")
)

validNewmanPair <- function(object) {
all((dim(object@nu.statistics) == dim(object@p.values))
& (dim(object@nu.statistics) == dim(object@pairedMean))
& (dim(object@nu.statistics) == dim(object@difference))
& (dim(object@nu.statistics) == dim(object@smoothSD)))
}
setValidity("NewmanPaired", validNewmanPair)

setMethod("[", signature = "NewmanPaired", function(x, i, j, ..., drop=FALSE) {
new("NewmanPaired",
nu.statistics = x@nu.statistics[i,j, drop=FALSE],
p.values = x@p.values[i,j, drop=FALSE],
pairedMean = x@pairedMean[i,j, drop=FALSE],
difference = x@difference[i,j, drop=FALSE],
smoothSD = x@smoothSD[i,j, drop=FALSE])
})

setMethod("dim", signature = "NewmanPaired", function(x) {
dim(x@nu.statistics)
})

setMethod("plot", signature = c("NewmanPaired", "missing"),
function(x, y, high=0.99, low=0.01, colset=c("red", "blue", "orange"), ...) {
if (dim(x)[2] > 1) {
warning("Multiple pairs in 'x'; only plotting the first one.")
x <- x[,1]
}
bigp <- x@p.values > 0.99
smallp <- x@p.values < 0.01
plot(x@pairedMean, x@difference, xlab="Mean log expression", ylab="Difference in log expression")
points(x@pairedMean[smallp], x@difference[smallp], col=colset[1], pch=16)
points(x@pairedMean[bigp], x@difference[bigp], col=colset[2], pch=16)
points(x@pairedMean, x@smoothSD, col=colset[3])
points(x@pairedMean, -x@smoothSD, col=colset[3])
legend("topleft",
c(paste("P <", round(low, 3)),
paste("P >", round(high, 3)),
"Smoothed SD"),
col=colset, pch=16)
invisible(x)
})

setMethod("hist", signature = "NewmanPaired", function(x, breaks=101, xlab="P-value", ...) {
if (dim(x)[2] > 1) {
warning("Multiple pairs in 'x'; only showing the first one.")
x <- x[,1]
}
hist(x@p.values, breaks=breaks, xlab=xlab, ...)
})

pairedStat <- function(baseData, perturbedData = NULL, pairing = NULL){

if (is.list(baseData)) {
x <- baseData
baseData <- do.call(cbind, lapply(x, function(entry) {entry[,1]}))
perturbedData <- do.call(cbind, lapply(x, function(entry) {entry[,2]}))
rm(x)
} else if (is.null(perturbedData)) {
if (is.null(pairing)) {
stop("You must supply at least one of 'perturbedData' or 'pairing'.")
}
pos <- which(pairing > 0)
pos <- pos[order(pairing[pos])]
neg <- which(pairing < 0)
neg <- neg[order(pairing[order(neg)])]
x <- baseData
baseData <- x[,neg]
perturbedData <- x[,pos]
rm(x, pos, neg)
}

## KRC: Do we need to check that the two matrices are the same size?
## Or just let the first computation throw its own error?

## Matrix computation of mean of two things
pairedMean <- (baseData + perturbedData) / 2
## Similar computation for SD of two things.
pooledSD <- abs(baseData - perturbedData) / sqrt(2)
## For each column, perform loess fit
n <- dim(baseData)[1]
s <- dim(baseData)[2]
smoothSD <- matrix(NA, n, s) # set aside storage
for (i in 1:s) {
l.mod <- loess(pooledSD[ ,i] ~ pairedMean[ ,i])
smoothSD[ ,i] <- predict(l.mod)
}

## compute the matrix of nu-statistics
## KRC: Why is there an absolute value?
matNu <- abs(baseData - perturbedData) / smoothSD

## empirical p-values via simulation
m <- mean(matNu)
sd <- sd(matNu)
randNu <- randNuGen(m, sd)
pValsPaired <- nu2PValPaired(matNu, as.vector(randNu))

## KRC: should we make this a proper object, or just leave it a list?
new("NewmanPaired",
nu.statistics = matNu,
p.values = pValsPaired,
pairedMean = pairedMean,
difference = perturbedData - baseData,
smoothSD = smoothSD)
}

### Generating 1 million Nu values based on the overall mean and std deviation
### of the Nu values obtained from the paired statistic. This will later be
### used to estimate the p-values.
randNuGen <- function(mu=0, sigma=1) {
## magic numbers:  ngenes = 10000, ntimes = 100
A <- matrix(rnorm(10000*100, mu, sigma), ncol=100)
B <- matrix(rnorm(10000*100, mu, sigma), ncol=100)
sdest <- mean( abs(A-B)/sqrt(2) )
abs(A-B)/sdest
}

### originally written by Chao Liu on stackoverflow at
### https://stackoverflow.com/questions/20133344/find-closest-value-in-a-vector-with-binary-search
NearestValueSearch <- function(x, w){
## A simple binary search algorithm
## Assume the w vector is sorted so we can use binary search
left <- 1
right <- length(w)
while(right - left > 1){
middle <- floor((left + right) / 2)
if(x < w[middle]){
right <- middle
}
else{
left <- middle
}
}
if(abs(x - w[right]) < abs(x - w[left])){
return(right)
}
else{
return(left)
}
}

nu2PValPaired <- function(nuMatrix, vec){
vec <- sort(vec)
MatP <- matrix(sapply(nuMatrix, function(x) {
1 - NearestValueSearch(x, vec)/length(vec)
}), nrow(nuMatrix), ncol(nuMatrix))
return(MatP)
}
```

## Try the NewmanOmics package in your browser

Any scripts or data that you put into this service are public.

NewmanOmics documentation built on June 13, 2018, 3:01 a.m.