Nothing
## mylevels() returns levels if given a factor, otherwise 0.
mylevels <- function(x) if (is.factor(x)) levels(x) else 0
snpRF <-
function(x.autosome=NULL,x.xchrom=NULL, xchrom.names=NULL, x.covar=NULL, y,
xtest.autosome=NULL,xtest.xchrom=NULL, xtest.covar=NULL,
ytest=NULL, ntree=500,
mtry=floor(sqrt(sum(c(ncol(x.autosome),ncol(x.xchrom)/2,ncol(x.covar))))),
replace=TRUE, classwt=NULL, cutoff, strata,
sampsize = if (replace) max(c(nrow(x.autosome),nrow(x.xchrom),nrow(x.covar))) else ceiling(.632*max(c(nrow(x.autosome),nrow(x.xchrom),nrow(x.covar)))),
nodesize = 1,
maxnodes=NULL,
importance=FALSE, localImp=FALSE,
proximity, oob.prox=proximity,
norm.votes=TRUE, do.trace=FALSE,
keep.forest=!is.null(y) && (is.null(xtest.autosome) & is.null(xtest.xchrom) & is.null(xtest.covar)),
keep.inbag=FALSE, ...) {
addclass <- is.null(y)
classRF <- addclass || is.factor(y)
if (!classRF && length(unique(y)) <= 5) {
warning("The response has five or fewer unique values. Are you sure you want to do regression?")
}
if (classRF && !addclass && length(unique(y)) < 2)
stop("Need at least two classes to do classification.")
### must have at least one of x.* ###
if(is.null(x.autosome) & is.null(x.xchrom) & is.null(x.covar)) stop("must have one of x.autosome, x.xchrom, or x.covar")
### force predictor data to be matrices ###
if(!is.null(x.autosome) & class(x.autosome)!="matrix") stop("x.autosome must be a matrix")
if(!is.null(x.xchrom) & class(x.xchrom)!="matrix") stop("x.xchrom must be a matrix")
if(!is.null(x.covar) & class(x.covar)!="matrix") stop("x.covar must be a matrix")
### make sure that x.xchrom are all 0's and 1's ###
if(!all(unique(as.vector(x.xchrom)) %in% c(0,1))) stop("x.xchrom must be a matrix of 0 & 1s")
### check to see whether the xchrom and autosome have same number of people ###
n<-as.numeric(as.character(unique(c(nrow(x.autosome),nrow(x.xchrom),nrow(x.covar)))))
if(length(n)>1) stop("different number of obs. for x objects")
p.xchrom<-ifelse(is.null(x.xchrom),0,ncol(x.xchrom))
p.autosome<-ifelse(is.null(x.autosome),0,ncol(x.autosome))
p.covar<-ifelse(is.null(x.covar),0,ncol(x.covar))
x.row.names <- list()
x.row.names[[1]]<-rownames(x.autosome)
x.row.names[[2]]<-rownames(x.xchrom)
x.row.names[[3]]<-rownames(x.covar)
x.row.names<-do.call(rbind,lapply(x.row.names,function(x) dimnames(x)[[2]]))
x.row.names<-x.row.names[!duplicated(x.row.names),]
if(!is.null(x.row.names) && dim(x.row.names)[1]>1) stop("row.names do not agree between x.*")
if(!is.null(x.autosome) && is.null(dimnames(x.autosome)[[2]])) dimnames(x.autosome)[[2]]<-paste("A",seq(1,p.autosome),sep="")
if(!is.null(x.xchrom) && is.null(xchrom.names)) { xchrom.names<-paste("X",seq(1,p.xchrom/2),sep="")
}else{ if(length(xchrom.names) != p.xchrom/2) stop("length(xchrom.names) != dim(x.xchrom)[2]/2")}
if(!is.null(x.covar) && is.null(dimnames(x.covar)[[2]])) dimnames(x.covar)[[2]]<-paste("C",seq(1,p.covar),sep="")
x.col.names <- c(dimnames(x.autosome)[[2]],dimnames(x.covar)[[2]],xchrom.names)
## overcome R's lazy evaluation:
keep.forest <- keep.forest
testdat <- !is.null(xtest.autosome) | !is.null(xtest.xchrom) | !is.null(xtest.covar)
if (testdat) {
if((!is.null(x.autosome) & is.null(xtest.autosome)) |
(!is.null(x.xchrom) & is.null(xtest.xchrom)) |
(!is.null(x.covar) & is.null(xtest.covar))) stop("each x object must have a corresponding xtest object")
if ((!is.null(xtest.autosome) && ncol(x.autosome) != ncol(xtest.autosome)) |
(!is.null(xtest.xchrom) && ncol(x.xchrom) != ncol(xtest.xchrom)) |
(!is.null(xtest.covar) && ncol(x.covar) != ncol(xtest.covar))) stop("x and xtest must have same number of columns")
ntest <- as.numeric(as.character(unique(c(nrow(xtest.autosome),nrow(xtest.xchrom),nrow(xtest.covar)))))
if(length(ntest)>1) stop("xtest datasets must have same number of observations")
xts.row.names <- list()
xts.row.names[[1]]<-rownames(xtest.autosome)
xts.row.names[[2]]<-rownames(xtest.xchrom)
xts.row.names[[3]]<-rownames(xtest.covar)
xts.row.names<-do.call(rbind,lapply(xts.row.names,function(x) dimnames(x)[[2]]))
xts.row.names<-xts.row.names[!duplicated(xts.row.names),]
if(!is.null(xts.row.names) && dim(xts.row.names)[1]>1) stop("row.names do not agree between xtest.*")
}
## Make sure mtry is in reasonable range.
if ((mtry < 1) || (mtry > (p.autosome + (p.xchrom/2) + p.covar)))
warning("invalid mtry: reset to within valid range")
mtry <- max(1, min((p.autosome + (p.xchrom/2) + p.covar), round(mtry)))
## Check for NAs.
if ( (!is.null(x.autosome) && any(is.na(x.autosome))) |
(!is.null(x.xchrom) && any(is.na(x.xchrom))) |
(!is.null(x.covar) && any(is.na(x.covar)))) stop("NA not permitted in genetic predictors")
if (testdat &&
((!is.null(xtest.autosome) && any(is.na(xtest.autosome))) |
(!is.null(xtest.xchrom) && any(is.na(xtest.xchrom))) |
(!is.null(xtest.covar) && any(is.na(xtest.covar))))) stop("NA not permitted in genetic xtest")
if (any(is.na(y))) stop("NA not permitted in response")
if (!is.null(ytest) && any(is.na(ytest))) stop("NA not permitted in ytest")
### forcing predictor variables to be matrices ###
ncat.xchrom <- rep(1, p.xchrom)
ncat.autosome <- rep(1, p.autosome+p.covar)
xlevels.xchrom <- as.list(rep(0, p.xchrom))
xlevels.autosome <- as.list(rep(0, p.autosome))
xlevels.covar <- as.list(rep(0, p.covar))
maxcat<-1
if (classRF) {
nclass <- length(levels(y))
## Check for empty classes:
if (any(table(y) == 0)) stop("Can't have empty classes in y.")
if (!is.null(ytest)) {
if (!is.factor(ytest)) stop("ytest must be a factor")
if (!all(levels(y) == levels(ytest)))
stop("y and ytest must have the same levels")
}
if (missing(cutoff)) {
cutoff <- rep(1 / nclass, nclass)
} else {
if (sum(cutoff) > 1 || sum(cutoff) < 0 || !all(cutoff > 0) ||
length(cutoff) != nclass) {
stop("Incorrect cutoff specified.")
}
if (!is.null(names(cutoff))) {
if (!all(names(cutoff) %in% levels(y))) {
stop("Wrong name(s) for cutoff")
}
cutoff <- cutoff[levels(y)]
}
}
if (!is.null(classwt)) {
if (length(classwt) != nclass)
stop("length of classwt not equal to number of classes")
## If classwt has names, match to class labels.
if (!is.null(names(classwt))) {
if (!all(names(classwt) %in% levels(y))) {
stop("Wrong name(s) for classwt")
}
classwt <- classwt[levels(y)]
}
if (any(classwt <= 0)) stop("classwt must be positive")
ipi <- 1
} else {
classwt <- rep(1, nclass)
ipi <- 0
}
} else addclass <- FALSE
if (missing(proximity)) proximity <- addclass
if (proximity) {
prox <- matrix(0.0, n, n)
proxts <- if (testdat) matrix(0, ntest, ntest + n) else double(1)
} else {
prox <- proxts <- double(1)
}
if (localImp) {
importance <- TRUE
impmat <- matrix(0, ((p.xchrom/2)+p.autosome+p.covar), n)
} else impmat <- double(1)
if (importance) {
#if (nPerm < 1) nPerm <- as.integer(1) else nPerm <- as.integer(nPerm)
if (classRF) {
impout <- matrix(0.0, ((p.xchrom/2)+p.autosome+p.covar), nclass + 2)
impSD <- matrix(0.0,((p.xchrom/2)+p.autosome+p.covar) , nclass + 1)
} else {
impout <- matrix(0.0, ((p.xchrom/2)+p.autosome+p.covar), 2)
impSD <- double(((p.xchrom/2)+p.autosome+p.covar))
names(impSD) <- x.col.names
}
} else {
impout <- double(((p.xchrom/2)+p.autosome+p.covar))
impSD <- double(1)
}
nsample <- if (addclass) 2 * n else n
Stratify <- length(sampsize) > 1
if ((!Stratify) && sampsize > n) stop("sampsize too large")
if (Stratify && (!classRF)) stop("sampsize should be of length one")
if (classRF) {
if (Stratify) {
if (missing(strata)) strata <- y
if (!is.factor(strata)) strata <- as.factor(strata)
nsum <- sum(sampsize)
if (length(sampsize) > nlevels(strata))
stop("sampsize has too many elements.")
if (any(sampsize <= 0) || nsum == 0)
stop("Bad sampsize specification")
## If sampsize has names, match to class labels.
if (!is.null(names(sampsize))) {
sampsize <- sampsize[levels(strata)]
}
if (any(sampsize > table(strata)))
stop("sampsize can not be larger than class frequency")
} else {
nsum <- sampsize
}
nrnodes <- 2 * trunc(nsum / nodesize) + 1
} else {
## For regression trees, need to do this to get maximal trees.
nrnodes <- 2 * trunc(sampsize/max(1, nodesize - 4)) + 1
}
if (!is.null(maxnodes)) {
## convert # of terminal nodes to total # of nodes
maxnodes <- 2 * maxnodes - 1
if (maxnodes > nrnodes) warning("maxnodes exceeds its max value.")
nrnodes <- min(c(nrnodes, max(c(maxnodes, 1))))
}
#### combine x.autosome and x.covar for simplicity, since no special care ###
#### needs to be taken for either of these sets of variables ###
x.autosome<-cbind(x.autosome,x.covar)
## Compiled code expects variables in rows and observations in columns.
## in case x.* is NULL set as 0, this will work because x.* will have dim 0 ##
## and will be skipped in rf.c ##
if(is.null(x.autosome)) {
x.autosome<-0
}else{
x.autosome <- t(x.autosome)
}
if(is.null(x.xchrom)) {
x.xchrom<-0
}else{
x.xchrom <- t(x.xchrom)
}
storage.mode(x.autosome) <- "double"
storage.mode(x.xchrom) <- "double"
if (testdat) {
#### combine xtest.autosome and xtest.covar for simplicity, as above for ###
#### x.autosome and x.covar ###
xtest.autosome<-cbind(xtest.autosome,xtest.covar)
if(is.null(xtest.autosome)) {
xtest.autosome<-0
p.xta<-0
}else{
p.xta<-dim(xtest.autosome)[2]
xtest.autosome <- t(xtest.autosome)
}
if(is.null(xtest.xchrom)) {
xtest.xchrom<-0
p.xtx<-0
}else{
p.xtx<-dim(xtest.xchrom)[2]
xtest.xchrom <- t(xtest.xchrom)
}
storage.mode(xtest.autosome) <- "double"
storage.mode(xtest.xchrom) <- "double"
if (is.null(ytest)) {
ytest <- labelts <- 0
} else {
labelts <- TRUE
}
} else {
xtest.autosome <- double(1)
p.xta<-0
xtest.xchrom <- double(1)
p.xtx<-0
ytest <- double(1)
ntest <- 1
labelts <- FALSE
}
nt <- if (keep.forest) ntree else 1
if (classRF) {
cwt <- classwt
threshold <- cutoff
error.test <- if (labelts) double((nclass+1) * ntree) else double(1)
rfout <- .C("classRF",
autosome=x.autosome,
xchrom=x.xchrom,
xdim = as.integer(c(p.autosome+p.covar,p.xchrom,n)),
y = as.integer(y),
nclass = as.integer(nclass),
ncat = as.integer(c(ncat.autosome,ncat.xchrom)),
maxcat = as.integer(maxcat),
sampsize = as.integer(sampsize),
strata = if (Stratify) as.integer(strata) else integer(1),
Options = as.integer(c(addclass,
importance,
localImp,
proximity,
oob.prox,
do.trace,
keep.forest,
replace,
Stratify,
keep.inbag)),
ntree = as.integer(ntree),
mtry = as.integer(mtry),
ipi = as.integer(ipi),
classwt = as.double(cwt),
cutoff = as.double(threshold),
nodesize = as.integer(nodesize),
outcl = integer(nsample),
counttr = integer(nclass * nsample),
prox = prox,
impout = impout,
impSD = impSD,
impmat = impmat,
nrnodes = as.integer(nrnodes),
ndbigtree = integer(ntree),
nodestatus = integer(nt * nrnodes),
bestvar = integer(nt * nrnodes),
treemap = integer(nt * 2 * nrnodes),
nodepred = integer(nt * nrnodes),
xbestsplit = double(nt * nrnodes),
errtr = double((nclass+1) * ntree),
testdat = as.integer(testdat),
xtsA = as.double(xtest.autosome),
xtsX = as.double(xtest.xchrom),
xtsDim = as.integer(c(p.xta,p.xtx)),
clts = as.integer(ytest),
nts = as.integer(ntest),
countts = double(nclass * ntest),
outclts = as.integer(numeric(ntest)),
labelts = as.integer(labelts),
proxts = proxts,
errts = error.test,
inbag = if (keep.inbag)
matrix(integer(n * ntree), n) else integer(n),
PACKAGE="snpRF")[-1]
if (keep.forest) {
## deal with the random forest outputs
max.nodes <- max(rfout$ndbigtree)
treemap <- aperm(array(rfout$treemap, dim = c(2, nrnodes, ntree)),
c(2, 1, 3))[1:max.nodes, , , drop=FALSE]
}
if (!addclass) {
## Turn the predicted class into a factor like y.
out.class <- factor(rfout$outcl, levels=1:nclass,
labels=levels(y))
names(out.class) <- x.row.names
con <- table(observed = y,
predicted = out.class)[levels(y), levels(y)]
con <- cbind(con, class.error = 1 - diag(con)/rowSums(con))
}
out.votes <- t(matrix(rfout$counttr, nclass, nsample))[1:n, ]
oob.times <- rowSums(out.votes)
if (norm.votes)
out.votes <- t(apply(out.votes, 1, function(x) x/sum(x)))
dimnames(out.votes) <- list(x.row.names, levels(y))
class(out.votes) <- c(class(out.votes), "votes")
if (testdat) {
out.class.ts <- factor(rfout$outclts, levels=1:nclass,
labels=levels(y))
names(out.class.ts) <- xts.row.names
out.votes.ts <- t(matrix(rfout$countts, nclass, ntest))
dimnames(out.votes.ts) <- list(xts.row.names, levels(y))
if (norm.votes)
out.votes.ts <- t(apply(out.votes.ts, 1,
function(x) x/sum(x)))
class(out.votes.ts) <- c(class(out.votes.ts), "votes")
if (labelts) {
testcon <- table(observed = ytest,
predicted = out.class.ts)[levels(y), levels(y)]
testcon <- cbind(testcon,
class.error = 1 - diag(testcon)/rowSums(testcon))
}
}
cl <- match.call()
cl[[1]] <- as.name("snpRF")
out <- list(call = cl,
type = if (addclass) "unsupervised" else "classification",
predicted = if (addclass) NULL else out.class,
err.rate = if (addclass) NULL else t(matrix(rfout$errtr,
nclass+1, ntree,
dimnames=list(c("OOB", levels(y)), NULL))),
confusion = if (addclass) NULL else con,
votes = out.votes,
oob.times = oob.times,
classes = levels(y),
importance = if (importance)
matrix(rfout$impout, (p.xchrom/2) + p.autosome + p.covar, nclass+2,
dimnames = list(x.col.names,
c(levels(y), "MeanDecreaseAccuracy",
"MeanDecreaseGini")))
else matrix(rfout$impout, ncol=1,
dimnames=list(x.col.names, "MeanDecreaseGini")),
importanceSD = if (importance)
matrix(rfout$impSD, (p.xchrom/2)+p.autosome+p.covar, nclass + 1,
dimnames = list(x.col.names,
c(levels(y), "MeanDecreaseAccuracy")))
else NULL,
localImportance = if (localImp)
matrix(rfout$impmat, (p.xchrom/2)+p.autosome+p.covar, n,
dimnames = list(x.col.names,x.row.names)) else NULL,
proximity = if (proximity) matrix(rfout$prox, n, n,
dimnames = list(x.row.names, x.row.names)) else NULL,
ntree = ntree,
mtry = mtry,
forest = if (!keep.forest) NULL else {
list(ndbigtree = rfout$ndbigtree,
nodestatus = matrix(rfout$nodestatus,
ncol = ntree)[1:max.nodes,, drop=FALSE],
bestvar = matrix(rfout$bestvar, ncol = ntree)[1:max.nodes,, drop=FALSE], ### this is after X markers are converted to 1 column
treemap = treemap,
nodepred = matrix(rfout$nodepred,
ncol = ntree)[1:max.nodes,, drop=FALSE],
xbestsplit = matrix(rfout$xbestsplit,
ncol = ntree)[1:max.nodes,, drop=FALSE],
pid = rfout$classwt, cutoff=cutoff, ncat=c(ncat.autosome,ncat.xchrom), ### this counts X markers twice
ncat.autosome=ncat.autosome,ncat.xchrom=ncat.xchrom, ### this counts X markers twice,
### and splits up autosomal+covar and X markers
maxcat = maxcat,
nrnodes = max.nodes, ntree = ntree,
nclass = nclass, xlevels=c(xlevels.autosome,xlevels.covar,xlevels.xchrom))
},
y = if (addclass) NULL else y,
test = if(!testdat) NULL else list(
predicted = out.class.ts,
err.rate = if (labelts) t(matrix(rfout$errts, nclass+1,
ntree,
dimnames=list(c("Test", levels(y)), NULL))) else NULL,
confusion = if (labelts) testcon else NULL,
votes = out.votes.ts,
proximity = if(proximity) matrix(rfout$proxts, nrow=ntest,
dimnames = list(xts.row.names, c(xts.row.names,
x.row.names))) else NULL),
inbag = if (keep.inbag) rfout$inbag else NULL)
} else {
stop("Regression trees not implemented")
out<-NULL
}
class(out) <- "snpRF"
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.