Nothing
yai <- function(x=NULL,y=NULL,data=NULL,k=1,noTrgs=FALSE,noRefs=FALSE,
nVec=NULL,pVal=.05,method="msn",ann=TRUE,mtry=NULL,ntree=500,
rfMode="buildClasses",bootstrap=FALSE,ppControl=NULL,
sampleVars=NULL,rfXsubsets=NULL)
{
# define functions used internally.
sumSqDiff=function(x,y) { d=x-y; sum(d*d) }
findFactors = get("findFactors",asNamespace("yaImpute"))
ftest.cor = function (p,q,N,cor)
{
s=min(p,q)
if (s==0) stop ("p and q must be > 0")
if (length(cor) < s) stop ("cor too short")
lamda=array(dim=s)
k=1:s
for (i in k) lamda[i]=prod(1-cor[i:s]^2)
r=(N-s-1)-((abs(p-q)+1)/2)
Ndf=(p-k+1)*(q-k+1)
u=(Ndf-2)/4
xx=((p-k+1)^2+(q-k+1)^2)-5
t=vector(mode="numeric",length=s)
for (i in k) if (xx[i]>0) t[i]=sqrt(((p-k[i]+1)^2*(q-k[i]+1)^2-4)/xx[i])
lamda.invt=lamda^(1/t)
Ddf=(r*t)-(2*u)
setNA = Ddf < 1 | Ndf < 1
firstNA = which(setNA == TRUE)
if (length(firstNA) == 0) firstNA=0
if (length(firstNA) > 1) firstNA=firstNA[1]
if (firstNA > 0) setNA[firstNA:length(setNA)] = TRUE
F=((1-lamda.invt)/lamda.invt)*(Ddf/Ndf)
F[setNA] = NA
pgF = F
firstNA = if (firstNA > 1) firstNA else length(F)
{
pgF[1:firstNA]=pf(F[1:firstNA],Ndf[1:firstNA],Ddf[1:firstNA],
lower.tail=FALSE)
pgF[setNA] = NA
}
list(F=F,pgF=pgF)
}
mymean = function(x)
{
if (is.null(ncol(x)))
{
ans = if (is.factor(x)) NA else mean(x)
}
else
{
ans=as.numeric(rep(NA,ncol(x)))
names(ans)=colnames(x)
for (i in 1:ncol(x)) if (!is.factor(x[,i])) ans[i]=mean(x[,i])
}
ans
}
mysd = function(x)
{
if (is.null(ncol(x)))
{
ans = if (is.factor(x)) NA else sd(x)
}
else
{
ans=as.numeric(rep(NA,ncol(x)))
names(ans)=colnames(x)
for (i in 1:ncol(x)) if (!is.factor(x[,i])) ans[i]=sd(x[,i])
}
ans
}
#===============================================
# ARGUMENT and DATA screening
methodSet=c("msn","msn2","msnPP","mahalanobis","ica","euclidean","gnn",
"randomForest","raw","random","gower")
if (!(method %in% methodSet))
stop (paste("method not one of:",paste(methodSet,collapse =", ")))
if (method == "gnn") # (GNN), make sure we have package vegan loaded
{
if (!requireNamespace ("vegan"))
{
stop("install vegan and try again")
# the purpose of this line of code is to suppress CRAN check notes
cca <- rda <- function (...) NULL
} else {
cca <- vegan::cca
rda <- vegan::rda
}
}
if (method == "ica") # (ica), make sure we have package fastICA loaded
{
if (!requireNamespace ("fastICA"))
{
stop("install fastica and try again")
# the purpose of this line of code is to suppress CRAN check notes
fastICA <- function (...) NULL
} else {
fastICA <- fastICA::fastICA
}
}
if (method == "randomForest") # make sure we have package randomForest loaded
{
if (!requireNamespace ("randomForest"))
{
stop("install randomForest and try again")
# the purpose of this line of code is to suppress CRAN check notes
randomForest <- function (...) NULL
} else {
randomForest <- randomForest::randomForest
}
}
if (method == "msnPP") # make sure we have package ccaPP loaded
{
if (!requireNamespace ("ccaPP"))
{
stop("install ccaPP and try again")
# the purpose of this line of code is to suppress CRAN check notes
fastMAD <- ccaGrid <- ccaProj <- function (...) NULL
} else {
fastMAD <- ccaPP::fastMAD
ccaGrid <- ccaPP::ccaGrid
ccaProj <- ccaPP::ccaProj
}
}
if (method == "gower") # make sure we have package gower loaded
{
if (!requireNamespace ("gower"))
{
stop("install gower and try again")
# the purpose of this line of code is to suppress CRAN check notes
gower_topn <- function (...) NULL
} else {
gower_topn <- gower::gower_topn
}
}
cl=match.call()
obsDropped=NULL
theFormula=NULL
yall=NULL
if (is.data.frame(x) | is.matrix(x))
{
if (mode(rownames(x)) != "character") rownames(x)=as.character(rownames(x))
xall=na.omit (as.data.frame(x))
if (nrow(xall) != nrow(x))
{
warning (nrow(x)-nrow(xall)," x observation(s) removed")
obsDropped=names(attributes(na.omit(x))$na.action)
}
if (!is.null(y))
{
if (is.null(dim(y)))
{
if (length(y) == nrow (x)) y=data.frame(y,row.names=rownames(x),
stringsAsFactors = TRUE)
else stop(paste0("when formulas are not used,",
" y must be a matrix, dataframe,",
" or a vector the same length of rows in x"))
}
if (is.matrix(y) | is.data.frame(y))
{
if (mode(rownames(y)) != "character") rownames(y)=as.character(rownames(y))
yall=na.omit(as.data.frame(y))
if (nrow(yall) != nrow(as.data.frame(y)))
{
warning (nrow(y)-nrow(yall)," y observation(s) removed")
obsDropped=union(obsDropped,names(attributes(na.omit(y))$na.action))
}
}
theFormula=NULL
}
}
else if (inherits(x,"formula"))
{
if (inherits(y,"formula")) yall=model.frame(y,data=data)
xall=model.frame(x,data=data)
obsDropped=setdiff(rownames(data),rownames(xall))
if (length(obsDropped)) warning (length(obsDropped)," observation(s) removed")
theFormula=list(x=x,y=y)
}
else stop ("x is missing or not a matrix or a dataframe")
if (is.null(yall) & (method %in% c("mahalanobis","ica",
"euclidean","randomForest","raw","gower")))
{
ydum=TRUE
yall=data.frame(ydummy=rep(1,nrow(xall)),row.names=rownames(xall))
}
else ydum=FALSE
if (is.null(yall)) stop("y is missing")
if (nrow(xall) == 0) stop ("no observations in x")
if (! (method %in% c("random","randomForest","gower")))
{
fy=0
if (!(method %in% c("mahalanobis","ica","euclidean","raw")))
fy=sum(findFactors(yall))
if (fy+sum(findFactors(xall)>0)>0)
stop("factors allowed only for methods randomForest, random, or gower")
}
refs=intersect(rownames(yall),rownames(xall))
if (length(refs) == 0) stop ("no reference observations.")
# if X variable subsets are used, make sure we are using method="randomForest"
if (!is.null(rfXsubsets) && method != "randomForest")
{
warning ("specification of rfXsubsets is ignored when method is not randomForest.")
rfXsubsets = NULL
}
if (!is.null(rfXsubsets))
{
vtok = match(unique(unlist(rfXsubsets)),names(xall))
if (any(is.na(vtok))) stop("one or more variables in rfXsubsets are not present in x.")
xall = xall[,vtok]
}
# if sampling variables, set up xRefs and yRefs accordingly
if (!is.null(sampleVars))
{
if (length(sampleVars) == 1 && is.null(names(sampleVars)))
sampleVars=rep(sampleVars,2)
names(sampleVars) = if (is.null(names(sampleVars))) c("X","Y") else
toupper(names(sampleVars))
nx = match("X",names(sampleVars))
ny = match("Y",names(sampleVars))
nx = if (!is.na(nx)) sampleVars[nx] else 0
ny = if (!is.na(ny)) sampleVars[ny] else 0
if (nx > 0)
{
nx = if (nx < 1.) max(1, ncol(xall)*nx) else min(nx, ncol(xall))
nxn = sample(1:ncol(xall),nx)
xall = xall[,nxn,drop=FALSE]
}
if (ny > 0)
{
ny = if (ny < 1.) max(1, ncol(yall)*ny) else min(ny, ncol(yall))
nyn = sample(1:ncol(yall),ny)
yall = yall[,nyn,drop=FALSE]
}
}
# if this is a bootstrap run, draw the sample.
if (bootstrap)
{
if (length (grep ("\\.[0-9]$",rownames(xall))) > 0)
stop ("rownames must not end with .[0-9] when bootstrap is true.")
bootsamp <- sort(sample(x=refs, size=length(refs), replace=TRUE))
yRefs=yall[bootsamp,,drop=FALSE]
xRefs=xall[bootsamp,,drop=FALSE]
refs = bootsamp
} else {
yRefs=yall[refs,,drop=FALSE]
xRefs=xall[refs,,drop=FALSE]
}
trgs=setdiff(rownames(xall),refs)
if (method == "gnn") # remove rows with zero sums or vegan will error off...
{
zero = apply(yRefs,1,sum) <= 0
ndrop=sum(zero)
if (ndrop>0)
{
warning (ndrop,paste0(" rows have y-variable row sums <= 0 were ",
"converted to target observations for method gnn"))
if (ndrop==nrow(yRefs)) stop ("all references were deleted")
obsDropped=union(obsDropped,refs[zero])
refs=refs[!zero]
yRefs=yall[refs,,drop=FALSE]
xRefs=xall[refs,,drop=FALSE]
trgs=setdiff(rownames(xall),refs)
}
# now remove columns with zero sums.
yDrop=apply(yRefs,2,sum) <= 0
if (sum(yDrop) > 0) warning ("y variables with zero sums: ",
paste(colnames(yRefs)[yDrop],collapse=","))
if (sum(yDrop) == ncol(yRefs)) stop("no y variables")
if (sum(yDrop) > 0) yRefs=yRefs[,!yDrop,drop=FALSE]
}
# initial scale values (maybe reduced by some methods).
if (method != "msnPP")
{
xScale=list(center=mymean(xRefs),scale=mysd(xRefs))
yScale=list(center=mymean(yRefs),scale=mysd(yRefs))
} else {
msn3cntr = function (x)
{
if (is.factor(x)) return (list(NULL,NULL))
cM = fastMAD(x) # uses fastMAD for scaling.
if (cM$MAD == 0)
{
cM$MAD = sd(x)
cM$center = mean(x)
}
cM
}
ce=matrix(unlist(apply(xRefs,2,msn3cntr)),ncol=2,byrow=TRUE)
rownames(ce) = colnames(xRefs)
xScale=list(center=ce[,1],scale=ce[,2])
ce=matrix(unlist(apply(yRefs,2,msn3cntr)),ncol=2,byrow=TRUE)
rownames(ce) = colnames(yRefs)
yScale=list(center=ce[,1],scale=ce[,2])
}
# for all methods except randomForest, random, raw, and gower
# variables with zero variance are dropped.
if (!(method %in% c("randomForest","random","raw","gower")))
{
xDrop=xScale$scale < 1e-10
if (sum(xDrop) > 0) warning ("x variables with zero variance: ",
paste(colnames(xRefs)[xDrop],collapse=","))
if (sum(xDrop) == ncol(xRefs)) stop("no x variables")
if (sum(xDrop) > 0)
{
xRefs=xRefs[,!xDrop,drop=FALSE]
xScale$scale=xScale$scale[!xDrop]
xScale$center=xScale$center[!xDrop]
}
}
else xDrop=NULL
# for most methods, xRefs must be a matrix.
if (! (method %in% c("randomForest","gower")) && !is.matrix(xRefs))
xRefs=as.matrix(xRefs)
# define these elements as NULL, some will be redefined below.
cancor=NULL
ftest=NULL
yDrop=NULL
projector=NULL
ccaVegan=NULL
ranForest=NULL
xTrgs=NULL
xcvRefs=NULL
xcvTrgs=NULL
ICA=NULL
#======= Define projector (if used), scale the variables, and project the
# reference space. Also project the target space if it is being used.
if (method %in% c("msn","msn2","msnPP")) # msn (all kinds)
{
yDrop=yScale$scale < 1e-10
if (sum(yDrop) > 0) warning ("y variables with zero variance: ",
paste(colnames(yRefs)[yDrop],collapse=","))
if (sum(yDrop) == ncol(yRefs)) stop("no y variables")
if (sum(yDrop) > 0)
{
yRefs=yRefs[,!yDrop,drop=FALSE]
yScale$scale=yScale$scale[!yDrop]
yScale$center=yScale$center[!yDrop]
}
xcvRefs=scale(xRefs,center=xScale$center,scale=xScale$scale)
ycvRefs=scale(yRefs,center=yScale$center,scale=yScale$scale)
if (method %in% c("msn","msn2")) # msn and msn2
{
cancor=cancor(xcvRefs,ycvRefs,xcenter = FALSE, ycenter = FALSE)
theCols = rownames(cancor$xcoef)
# scale the coefficients so that the cononical vectors will have unit variance.
cscal = 1/apply(xcvRefs[,theCols,drop=FALSE] %*%
cancor$xcoef[,1,drop=FALSE],2,sd)
cancor$ycoef = cancor$ycoef * cscal
cancor$xcoef = cancor$xcoef * cscal
} else { # msnPP
meth="spearman"
ppfunc=ccaGrid
if (!is.null(ppControl))
{
if (is.null(names(ppControl))) stop ("ppControl must have named strings.")
for (ppn in names(ppControl))
{
if (ppn == "method") meth = ppControl[[ppn]]
else if (ppn == "search") ppfunc =
if (ppControl[[ppn]] == "data" ||
ppControl[[ppn]] == "proj") ccaProj else ccaGrid
else stop ("ppControl named element ",ppn," is invalid")
}
}
# solve the canoncial correlation analysis via projection pursuit
cancor=ppfunc(xcvRefs,ycvRefs,method=meth,fallback=TRUE,
k=min(ncol(xcvRefs),ncol(ycvRefs)),nVec)
# save the results using names and attributes that correspond
# to the cancor results
cancor$ycoef = cancor$B
rownames(cancor$ycoef) = colnames(ycvRefs)
cancor$xcoef = cancor$A
rownames(cancor$xcoef) = colnames(xcvRefs)
theCols = rownames(cancor$xcoef)
cancor$A = NULL
cancor$B = NULL
class(cancor) = "list"
}
ftest=ftest.cor(p=nrow(cancor$ycoef),q=nrow(cancor$xcoef),
N=nrow(yRefs),cancor$cor)
if (is.null(nVec))
{
fcheck = ftest$pgF[!is.na(ftest$pgF)]
if (length(fcheck)> 0) nVec=max(1,length(fcheck)-sum(fcheck>pVal))
}
if (is.null(nVec)) nVec=1
nVec=min(nVec,length(cancor$cor))
nVec=max(nVec,1)
if (method %in% c("msn","msnPP")) projector =
cancor$xcoef[,1:nVec,drop=FALSE] %*%
diag(cancor$cor[1:nVec,drop=FALSE],nVec,nVec)
if (method == "msn2")
{
if (any (1/(1-cancor$cor[1:nVec,drop=FALSE]^2) <
.Machine$double.eps*10000)) nVec=1
if (any (1/(1-cancor$cor[1:nVec,drop=FALSE]^2) <
.Machine$double.eps*10000))
stop("msn2 can not be run likely because there are too few obesrvations.")
projector = cancor$xcoef[,1:nVec,drop=FALSE] %*%
diag(cancor$cor[1:nVec,drop=FALSE],nVec,nVec) %*%
diag(sqrt(1/(1-cancor$cor[1:nVec,drop=FALSE]^2)),nVec,nVec)
if (any (projector == -Inf | projector == Inf | is.na(projector) |
is.nan(projector))) stop ("msn2 can not be run.")
}
if (length(theCols)<ncol(xRefs))
{
#just get the names and create a logical
if (is.null(xDrop)) xDrop=xScale$center==0
remove=setdiff(colnames(xRefs),theCols)
xDrop[remove]=TRUE
warning ("x variables with colinearity: ",paste(remove,collapse=","))
xRefs=xRefs[,theCols,drop=FALSE]
xScale$center=xScale$center[theCols]
xScale$scale=xScale$scale[theCols]
xcvRefs=scale(xRefs,center=xScale$center,scale=xScale$scale)
}
xcvRefs=xcvRefs %*% projector
if (!noTrgs && length(trgs) > 0)
{
xTrgs=xall[trgs,theCols,drop=FALSE]
xcvTrgs=scale(xTrgs,center=xScale$center,scale=xScale$scale)
xcvTrgs=xcvTrgs %*% projector
}
}
else if (method == "mahalanobis")
{
xcvRefs=scale(xRefs,center=xScale$center,scale=xScale$scale)
qr = qr(xcvRefs) # maybe we are not at full rank
xcvRefs=xcvRefs[,qr$pivot[1:qr$rank],drop=FALSE]
projector = solve(chol(cov(xcvRefs)))
theCols = colnames(projector)
if (length(theCols)<ncol(xRefs))
{
#just get the names and create a logical
if (is.null(xDrop)) xDrop=xScale$center==0
remove=setdiff(colnames(xRefs),theCols)
xDrop[remove]=TRUE
warning ("x variables with colinearity: ",paste(remove,collapse=","))
xRefs=xRefs[,theCols,drop=FALSE]
xScale$center=xScale$center[theCols]
xScale$scale=xScale$scale[theCols]
}
nVec = ncol(projector) # same as qr$rank
xcvRefs=xcvRefs %*% projector
if (!noTrgs && length(trgs) > 0)
{
xTrgs=xall[trgs,theCols,drop=FALSE]
xcvTrgs=scale(xTrgs,center=xScale$center,scale=xScale$scale)
xcvTrgs=xcvTrgs %*% projector
}
}
else if (method == "ica")
{
xcvRefs=scale(xRefs,center=xScale$center,scale=xScale$scale)
qr = qr(xcvRefs) # maybe we are not at full rank
xcvRefs=xcvRefs[,qr$pivot[1:qr$rank],drop=FALSE]
a=fastICA(xcvRefs,ncol(xcvRefs),method="C",)
ICA=list(S=a$S,K=a$K,A=a$A,W=a$W)
projector = a$K %*% a$W
colnames(projector)=colnames(xcvRefs)
rownames(projector)=colnames(xcvRefs)
theCols = colnames(xcvRefs)
if (length(theCols)<ncol(xRefs))
{
if (is.null(xDrop)) xDrop=xScale$center==0
remove=setdiff(colnames(xRefs),theCols)
xDrop[remove]=TRUE
warning ("x variables with colinearity: ",paste(remove,collapse=","))
xRefs=xRefs[,theCols,drop=FALSE]
xScale$center=xScale$center[theCols]
xScale$scale=xScale$scale[theCols]
}
nVec = ncol(projector) # same as qr$rank
xcvRefs=xcvRefs %*% projector
if (!noTrgs && length(trgs) > 0)
{
xTrgs=xall[trgs,theCols,drop=FALSE]
xcvTrgs=scale(xTrgs,center=xScale$center,scale=xScale$scale)
xcvTrgs=xcvTrgs %*% projector
}
}
else if (method == "euclidean")
{
xcvRefs=scale(xRefs,center=xScale$center,scale=xScale$scale)
nVec = ncol(xRefs)
if (!noTrgs && length(trgs) > 0)
{
xTrgs=xall[trgs,!xDrop,drop=FALSE]
xcvTrgs=scale(xTrgs,center=xScale$center,scale=xScale$scale)
}
}
else if (method %in% c("raw"))
{
xcvRefs=xRefs
nVec = ncol(xRefs)
if (!noTrgs && length(trgs) > 0)
{
xTrgs=xall[trgs,,drop=FALSE]
xcvTrgs=as.matrix(xTrgs)
}
}
else if (method %in% c("gower"))
{
xcvRefs=xRefs
nVec = ncol(xRefs)
if (!noTrgs && length(trgs) > 0)
{
xTrgs=xall[trgs,,drop=FALSE]
xcvTrgs=xTrgs
}
ann=FALSE
}
else if (method == "gnn") # GNN
{
xcvRefs=scale(xRefs,center=xScale$center,scale=xScale$scale)
ccaVegan = cca(X=yRefs, Y=xcvRefs)
if (is.null(ccaVegan$CCA) |
ccaVegan$CCA$rank == 0)
{
warning (paste("cca() in package vegan failed, likely cause is",
"too few X or Y variables.\nAttemping rda(),",
"which is not well tested in the yaImpute package."))
ccaVegan = rda(X=yRefs, Y=xcvRefs)
}
# create a projected space for the reference observations
xcvRefs=predict(ccaVegan,type="lc",rank="full")
xcvRefs=xcvRefs %*% diag(sqrt(ccaVegan$CCA$eig/sum(ccaVegan$CCA$eig)))
# create a projected space for the unknowns (target observations)
if (!noTrgs && length(trgs) > 0)
{
xTrgs=xall[trgs,,drop=FALSE]
xcvTrgs=scale(xTrgs,center=xScale$center,scale=xScale$scale)
xcvTrgs=predict(ccaVegan,
newdata=as.data.frame(xcvTrgs),type="lc",rank="full")
xcvTrgs=xcvTrgs %*% diag(sqrt(ccaVegan$CCA$eig/sum(ccaVegan$CCA$eig)))
}
nVec = ncol(xcvRefs)
}
else if (method == "randomForest")
{
rfBuildClasses=NULL
xTrgs=xall[trgs,1,drop=FALSE]
rfVersion=packageDescription("randomForest")[["Version"]]
if (compareVersion(rfVersion,"4.5-22") < 0)
stop("Update your version of randomForest.")
if (is.null(ntree)) ntree=500
if (ydum)
{
if (!is.null(rfXsubsets)) warning(paste0("Specification of rfXsubsets",
" ignored when unsupervised randomForest is run."))
yone=NULL
mt = if (is.null(mtry)) max(floor(sqrt(ncol(xRefs))),1) else
min(mtry, ncol(xRefs))
ranForest=randomForest(x=xRefs,y=yone,proximity=FALSE,importance=TRUE,
keep.forest=TRUE,mtry=mt,ntree=ntree)
ranForest$type="yaImputeUnsupervised"
ranForest=list(unsupervised=ranForest)
}
else
{
ranForest=vector("list",ncol(yRefs))
if (length(ntree) < ncol(yRefs)) ntree=rep(max(50,
floor(ntree/ncol(yRefs))),ncol(yRefs))
for (i in 1:ncol(yRefs))
{
xN = names(xRefs)
if (!is.null(rfXsubsets))
{
yV = names(yRefs)[i]
if (!is.null(rfXsubsets[[yV]])) xN = intersect(rfXsubsets[[yV]],xN)
if (length(xN)==0) stop ("rfXsubsets is empty for Y-variable ",yV)
}
yone=yRefs[,i]
if (!is.factor(yone))
{
if (is.null(rfBuildClasses) && rfMode=="buildClasses")
rfBuildClasses=TRUE
if (is.null(rfBuildClasses))
{
# if the version is prior to 19
if (compareVersion(rfVersion,"4.5-19") < 0)
{
warning(paste0("yaImpute directly supports regression for ",
"continuous y's for randomForest version 4.5-19 and later."))
rfBuildClasses=TRUE
}
else rfBuildClasses=FALSE
}
if (rfBuildClasses)
{
yone=as.numeric(yone)
breaks <- pretty(yone, n = min(20,nclass.Sturges(yone)),min.n = 1)
div <- diff(breaks)[1]
yone=as.factor(floor(yone/div))
}
}
mt = if (is.null(mtry)) max(floor(sqrt(length(xN))), 1) else
min(mtry, length(xN))
ranForest[[i]]=randomForest(x=xRefs[,xN,FALSE],
y=yone,proximity=FALSE,importance=TRUE,keep.forest=TRUE,
mtry=mt,ntree=ntree[i])
}
names(ranForest)=colnames(yRefs)
}
nodes=NULL
for (i in 1:length(ranForest))
{
nodeset=attr(predict(ranForest[[i]],xall,
proximity=FALSE,nodes=TRUE),"nodes")
if (is.null(nodeset)) stop("randomForest did not return nodes")
colnames(nodeset)=paste(colnames(nodeset),i,sep=".")
nodes=if (is.null(nodes)) nodeset else cbind(nodes,nodeset)
}
if (bootstrap)
{
rn = sub("\\.[0-9]$","",rownames(xRefs))
refNodes = nodes[rn,]
rownames(refNodes) = rownames(xRefs)
} else refNodes = nodes[rownames(xRefs),]
INTrefNodes=as.integer(refNodes)
INTnrow=as.integer(nrow(xRefs))
INTncol=as.integer(ncol(nodes))
INTsort = INTrefNodes
dim(INTsort) = c(INTnrow,INTncol)
INTsort=apply(INTsort,2,function (x) sort(x,index.return = TRUE,
decreasing = FALSE)$ix-1)
attributes(INTsort)=NULL
INTsort = as.integer(INTsort)
attr(ranForest,"rfRefNodeSort") = list(INTrefNodes=INTrefNodes,
INTnrow=INTnrow, INTncol=INTncol, INTsort=INTsort)
}
else if (method == "random")
{
nVec = 1
ann=FALSE
xcvRefs=data.frame(random=runif(nrow(xRefs)),row.names=rownames(xRefs))
if (!noTrgs && length(trgs) > 0) xcvTrgs=
data.frame(random=runif(length(trgs)),row.names=trgs)
}
else # default
{
stop("no code for specified method")
}
# if bootstrap, then modify the reference list essentually removing the
# duplicate samples. For randomForest, correct processing is done above.
if (bootstrap && method != "randomForest")
{
unq = unique(bootsamp)
if (!is.null(xcvRefs)) xcvRefs = xcvRefs[unq,,drop=FALSE]
xRefs = xRefs[unq,,drop=FALSE]
}
k=min(k,nrow(xRefs))
# ======= find neighbors for TARGETS
if (noTrgs || length(trgs) == 0)
{
neiDstTrgs=NULL
neiIdsTrgs=NULL
}
else
{
neiDstTrgs=matrix(data=NA,nrow=length(trgs),ncol=k)
rownames(neiDstTrgs)=trgs
colnames(neiDstTrgs)=paste("Dst.k",1:k,sep="")
neiIdsTrgs=matrix(data="",nrow=length(trgs),ncol=k)
rownames(neiIdsTrgs)=trgs
colnames(neiIdsTrgs)=paste("Id.k",1:k,sep="")
if (method %in% c("msn","msn2","msnPP","mahalanobis",
"ica","euclidean","gnn","raw"))
{
if (ann)
{
ann.out=ann(xcvRefs, xcvTrgs, k, verbose=FALSE)$knnIndexDist
neiDstTrgs[TRUE]=sqrt(ann.out[,(k+1):ncol(ann.out)])
for (i in 1:k)
neiIdsTrgs[,i]=rownames(xcvRefs)[ann.out[,i]]
rownames(neiDstTrgs)=rownames(neiIdsTrgs)
}
else
{
for (row in rownames(xcvTrgs))
{
d=sqrt(sort(apply(xcvRefs,MARGIN=1,sumSqDiff,xcvTrgs[row,]))[1:k])
neiDstTrgs[row,]=d
neiIdsTrgs[row,]=names(d)
}
}
}
else if (method == "random")
{
l=k+1
d = matrix(unlist(lapply(xcvTrgs[[1]],function (x, xcv, l)
{
sort((xcv-x)^2,index.return=TRUE)$ix[2:l]
},xcvRefs[[1]],l)),nrow=nrow(xcvTrgs),ncol=k,byrow=TRUE)
for (ic in 1:ncol(d))
{
neiDstTrgs[,ic]=abs(xcvTrgs[,1]-xcvRefs[d[,ic],1])
neiIdsTrgs[,ic]=rownames(xcvRefs)[d[,ic]]
}
}
else if (method == "gower")
{
gow = gower_topn(x=xcvRefs,y=xcvTrgs,n=k)
for (i in 1:k)
{
neiDstTrgs[,i]=gow$distance[i,]
neiIdsTrgs[,i]=rownames(xcvTrgs)[gow$index[i,]]
}
}
else if (method == "randomForest")
{
prox=lapply(apply(nodes[rownames(xTrgs),,drop=FALSE],1,as.list),function (x)
{
prx=.Call("rfoneprox", INTrefNodes, INTsort, INTnrow, INTncol,
as.integer(x), vector("integer",INTnrow))
if (k > 1) px=sort(prx,index.return = TRUE, decreasing = TRUE)$ix[1:k]
else px=which.max(prx)
c(prx[px],px) # counts followed by pointers to references
})
for (i in 1:k)
{
neiDstTrgs[,i]=unlist(lapply(prox,function (x,i) (INTncol-x[i])/INTncol,i))
neiIdsTrgs[,i]=unlist(lapply(prox,function (x,i,k,Rnames)
Rnames[x[k+i]],i,k,rownames(xRefs)))
}
}
else # default
{
stop("no code for specified method")
}
}
# ======= find neighbors for REFERENCES
if (noRefs)
{
neiDstRefs=NULL
neiIdsRefs=NULL
}
else
{
neiDstRefs=matrix(data=NA,nrow=nrow(xRefs),ncol=k)
rownames(neiDstRefs)=rownames(xRefs)
colnames(neiDstRefs)=paste("Dst.k",1:k,sep="")
neiIdsRefs=matrix(data="",nrow=nrow(xRefs),ncol=k)
rownames(neiIdsRefs)=rownames(xRefs)
colnames(neiIdsRefs)=paste("Id.k",1:k,sep="")
l=k+1
if (method %in% c("msn","msn2","msnPP","mahalanobis","ica","euclidean",
"gnn","raw"))
{
if (ann & nrow(xcvRefs)> 0)
{
ann.out=ann(xcvRefs, xcvRefs, l, verbose=FALSE)$knnIndexDist
neiDstRefs[TRUE]=sqrt(ann.out[,(l+2):ncol(ann.out)])
# check for a second neighbor being the reference itself (can happen
# if the first and second neighbors both have distance of zero).
fix = ann.out[,1] != 1:nrow(ann.out)
if (any(fix)) ann.out[fix,2] = ann.out[fix,1]
for (i in 2:l)
{
neiIdsRefs[,(i-1)]=rownames(xcvRefs)[ann.out[,i]]
}
rownames(neiDstRefs)=rownames(neiIdsRefs)
}
else
{
for (row in 1:nrow(xcvRefs))
{
d=sort(apply(xcvRefs,MARGIN=1,sumSqDiff,xcvRefs[row,])[-row])[1:k]
neiDstRefs[row,]=d
neiIdsRefs[row,]=names(d)
}
}
}
else if (method == "gower")
{
gow = gower_topn(x=xcvRefs,y=xcvRefs,n=l)
for (i in 2:l)
{
neiDstRefs[,(i-1)]=gow$distance[i,]
neiIdsRefs[,(i-1)]=rownames(xcvRefs)[gow$index[i,]]
}
}
else if (method == "randomForest")
{
prox=lapply(apply(refNodes,1,as.list),function (x)
{
prx=.Call("rfoneprox", INTrefNodes, INTsort, INTnrow, INTncol,
as.integer(x), vector("integer",INTnrow))
if (k > 1) px=sort(prx,index.return = TRUE, decreasing = TRUE)$ix[2:l]
else
{
px=which.max(prx)
prx[px]=-1
px=which.max(prx)
}
c(prx[px],px) # counts followed by pointers to references
})
for (i in 1:k)
{
neiDstRefs[,i]=unlist(lapply(prox,function (x,i) (INTncol-x[i])/INTncol,i))
neiIdsRefs[,i]=unlist(lapply(prox,function (x,i,k,Rnames)
Rnames[x[k+i]],i,k,rownames(xRefs)))
}
}
else if (method == "random")
{
l=k+1
d = matrix(unlist(lapply(xcvRefs[[1]],function (x, xcv, l)
{
sort((xcv-x)^2,index.return=TRUE)$ix[2:l]
},xcvRefs[[1]],l)),nrow=nrow(xcvRefs),ncol=k,byrow=TRUE)
for (ic in 1:ncol(d))
{
neiDstRefs[,ic]=abs(xcvRefs[,1]-xcvRefs[d[,ic],1])
neiIdsRefs[,ic]=rownames(xcvRefs)[d[,ic]]
}
}
else # default
{
stop("no code for specified method")
}
}
xlevels=NULL
fa=findFactors(xRefs)
if (sum(fa)>0)
{
xlevels=vector(mode="list",length=sum(fa))
kk=0
for (i in 1:length(fa))
{
if (fa[i])
{
kk=kk+1
xlevels[[kk]]=levels(xRefs[,i])
names(xlevels)[[kk]]=names(xRefs)[i]
}
}
}
out=list(call=cl,yRefs=yRefs,xRefs=xRefs,obsDropped=obsDropped,yDrop=yDrop,
bootstrap= if (bootstrap) bootsamp else bootstrap,
xDrop=xDrop,trgRows=trgs,xall=xall,cancor=cancor,theFormula=theFormula,
ftest=ftest,yScale=yScale,xScale=xScale,ccaVegan=ccaVegan,ranForest=ranForest,
ICA=ICA,k=k,projector=projector,nVec=nVec,pVal=pVal,method=method,ann=ann,
xlevels=xlevels,neiDstTrgs=neiDstTrgs,neiIdsTrgs=neiIdsTrgs,
neiDstRefs=neiDstRefs,neiIdsRefs=neiIdsRefs,rfXsubsets=rfXsubsets)
class(out)="yai"
out
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.