# R/normalize2Reference.R In caret: Classification and Regression Training

```"normalize2Reference" <-
function (data, refData = NULL, ties = TRUE)
{
if (is.null(dim(data)))
{
numSamples <- 1
numProbes <- length(data)
data <- as.matrix(data)
} else {
numSamples <- dim(data)[2]
numProbes <- dim(data)[1]
}

dataOrder <- sortedData <- array(, dim(data))
if (ties) dataRanks <- dataOrder

nobs <- rep(numProbes, numSamples)

quantProbs <- (0:(numProbes - 1))/(numProbes - 1)

for (j in 1:numSamples)
{
## sort each column of data and get indicies and ranks
sortedSampleI <- sort(data[, j],
method = "quick",
index.return = TRUE)
if (ties) dataRanks[, j] <- rank(data[, j])
nobsj <- length(sortedSampleI\$x)
if (nobsj < numProbes) {
nobs[j] <- nobsj
isna <- is.na(data[, j])
sortedData[, j] <- approx((0:(nobsj - 1))/(nobsj - 1),
sortedSampleI\$x,
quantProbs,
ties = "ordered")\$y
dataOrder[!isna, j] <- ((1:numProbes)[!isna])[sortedSampleI\$ix]
}
else {
sortedData[, j] <- sortedSampleI\$x
dataOrder[, j] <- sortedSampleI\$ix
}
}
## at this point, we have the data sorted within each column and a key in dataOrder
## to revert back to the original input data: data[,j] == > sortedData[dataOrder[,j],j]

## use the row means from the data quantiles when no explicit reference is given.
if(is.null(refData))
{
refQuantile <- rowMeans(sortedData)
} else refQuantile <- sort(refData)

for (j in 1:numSamples) {
if (nobs[j] < numProbes)
{
isna <- is.na(data[, j])
if (ties)
## The approx does a linear approx on the line where
## x is the index scaled to [0, 1] and y is the mean
## for that index across all samples (or reference quants)
## It returns an approximated mean value
data[!isna, j] <- approx(quantProbs,
refQuantile,
(dataRanks[!isna, j] - 1)/(nobs[j] - 1),
ties = "ordered")\$y
else data[dataOrder[!isna, j], j] <- approx(quantProbs,
refQuantile,
(0:(nobs[j] - 1))/(nobs[j] - 1),
ties = "ordered")\$y
}
else {
if (ties)
data[, j] <- approx(quantProbs,
refQuantile,
(dataRanks[, j] - 1)/(numProbes - 1),
ties = "ordered")\$y
else data[dataOrder[, j], j] <- refQuantile
}
}
data
}
```

## Try the caret package in your browser

Any scripts or data that you put into this service are public.

caret documentation built on May 2, 2019, 5:47 p.m.