# R/subfuns.R In SIS: Sure Independence Screening

#### Defines functions calculate.nsisint.size.kintensureobtain.newixobtain.ix0mgmargcoefgetdfrepmatloglik

```loglik = function(X, y, beta, family) {
K = dim(beta)[2]
link = cbind(1, X) %*% beta
yrep = repmat(y, 1, K)
if (family == "gaussian")
if (family == "poisson")
if (family == "binomial")
}

repmat = function(X, m, n) {
## R equivalent of repmat (matlab)
X = as.matrix(X)
mx = dim(X)[1]
nx = dim(X)[2]
matrix(t(matrix(X, mx, nx * n)), mx * m, nx * n, byrow = T)
}

getdf = function(coef.beta) {
apply(abs(coef.beta) > 1e-10, 2, sum)
}

margcoef <- function(x, y, condind = NULL, family, null.model = FALSE, iterind) {
n = dim(x)[1]
p = dim(x)[2]
ones = rep(1, n)
candind = setdiff(1:p, condind)
if (iterind == 0) {
if (family == "cox")
margcoef = abs(cor(x, y[, 1])) else margcoef = abs(cor(x, y))
} else {
if (null.model == TRUE) {
if (is.null(condind) == TRUE) {
x = x[sample(1:n), ]
}
if (is.null(condind) == FALSE) {
x[, candind] = x[sample(1:n), candind]
}
}
margcoef = abs(sapply(candind, mg, x, y, ones, family, condind))
}
return(margcoef)
}

mg <- function(index, x = x, y = y, ones = ones, family = family, condind = condind) {
margfit = switch(family, gaussian = coef(glm.fit(cbind(ones, x[, index], x[, condind]), y, family = gaussian()))[2],
binomial = coef(glm.fit(cbind(ones, x[, index], x[, condind]), y, family = binomial()))[2], poisson = coef(glm.fit(cbind(ones,
x[, index], x[, condind]), y, family = poisson()))[2], cox = coef(coxph(y ~ cbind(x[, index], x[,
condind])))[1])
}

obtain.ix0 <- function(x, y, s1, s2, family, nsis, iter, varISIS, perm, q, greedy, greedy.size, iterind) {
if (iter == FALSE) {
margcoef = margcoef(x, y, family = family, null.model = FALSE, iterind = iterind)
rankcoef = sort(margcoef, decreasing = TRUE, index.return = TRUE)
ix0 = rankcoef\$ix[1:nsis]
} else {
if (varISIS == "vanilla") {
margcoef = margcoef(x, y, family = family, null.model = FALSE, iterind = iterind)
rankcoef = sort(margcoef, decreasing = TRUE, index.return = TRUE)
if (perm == FALSE)
ix0 = rankcoef\$ix[1:floor((2/3) * nsis)] else {
count = 0
repeat {
count = count + 1
randcoef = margcoef(x, y, family = family, null.model = TRUE, iterind = iterind)
if (length(which(margcoef >= quantile(randcoef, q))) > 0)
break
if(count > 10)
break
}
if (greedy == FALSE) {
if (length(which(margcoef >= quantile(randcoef, q))) >= 2) {
length1 = length(which(margcoef >= quantile(randcoef, q)))
above.thresh = rankcoef\$ix[1:length1]
ix0 = rankcoef\$ix[1:floor((2/3) * nsis)]
ix0 = sort(intersect(ix0, above.thresh))
} else ix0 = rankcoef\$ix[1:2]
} else {
if (greedy.size == 1)
ix0 = rankcoef\$ix[1:2] else ix0 = rankcoef\$ix[1:greedy.size]
}
}
} else {
if(family == 'cox'){
margcoef1 = margcoef(x[s1, ], y[s1,], family = family, null.model = FALSE, iterind = iterind)
margcoef2 = margcoef(x[s2, ], y[s2,], family = family, null.model = FALSE, iterind = iterind)
} else{
margcoef1 = margcoef(x[s1, ], y[s1], family = family, null.model = FALSE, iterind = iterind)
margcoef2 = margcoef(x[s2, ], y[s2], family = family, null.model = FALSE, iterind = iterind)

}

rankcoef1 = sort(margcoef1, decreasing = TRUE, index.return = TRUE)
rankcoef2 = sort(margcoef2, decreasing = TRUE, index.return = TRUE)
if (perm == FALSE) {
if (varISIS == "aggr") {
ix01 = rankcoef1\$ix[1:floor((2/3) * nsis)]
ix02 = rankcoef2\$ix[1:floor((2/3) * nsis)]
ix0 = sort(intersect(ix01, ix02))
if (length(ix0) <= 1)
ix0 = int.size.k(rankcoef1\$ix, rankcoef2\$ix, 2)
}
if (varISIS == "cons") {
iensure = intensure(floor((2/3) * nsis), l1 = rankcoef1\$ix, l2 = rankcoef2\$ix, k = floor((2/3) * nsis))
ix01 = rankcoef1\$ix[1:iensure]
ix02 = rankcoef2\$ix[1:iensure]
ix0 = sort(intersect(ix01, ix02))
}
} else {
count = 0
repeat {
count  = count + 1
randcoef1 = margcoef(x[s1, ], y[s1], family = family, null.model = TRUE, iterind = iterind)
randcoef2 = margcoef(x[s2, ], y[s2], family = family, null.model = TRUE, iterind = iterind)
if (length(which(margcoef1 >= quantile(randcoef1, q))) > 0 && length(which(margcoef2 >= quantile(randcoef2,
q))) > 0)
break
if(count > 10) break
}
if (greedy == FALSE) {
length1 = length(which(margcoef1 >= quantile(randcoef1, q)))
length2 = length(which(margcoef2 >= quantile(randcoef2, q)))
above.thresh.1 = rankcoef1\$ix[1:length1]
above.thresh.2 = rankcoef2\$ix[1:length2]
ix01 = rankcoef1\$ix[1:floor((2/3) * nsis)]
ix02 = rankcoef2\$ix[1:floor((2/3) * nsis)]
ix01 = sort(intersect(ix01, above.thresh.1))
ix02 = sort(intersect(ix02, above.thresh.2))
ix0 = sort(intersect(ix01, ix02))
if (length(ix0) <= 1)
ix0 = int.size.k(rankcoef1\$ix, rankcoef2\$ix, 2)
} else {
if (greedy.size == 1)
ix0 = int.size.k(rankcoef1\$ix, rankcoef2\$ix, 2) else ix0 = int.size.k(rankcoef1\$ix, rankcoef2\$ix, greedy.size)
}
}
}
}
return(ix0)
}

obtain.newix <- function(x, y, ix1, candind, s1, s2, family, pleft, varISIS, perm, q, greedy, greedy.size, iterind) {
if (varISIS == "vanilla") {
margcoef = margcoef(x, y, ix1, family = family, null.model = FALSE, iterind = iterind)
rankcoef = sort(margcoef, decreasing = TRUE, index.return = TRUE)
if (perm == FALSE) {
if (pleft > 0)
newix = candind[rankcoef\$ix[1:pleft]] else newix = NULL
} else {
randcoef = margcoef(x, y, ix1, family = family, null.model = TRUE, iterind = iterind)
if (length(which(margcoef >= quantile(randcoef, q))) > 0) {
if (greedy == FALSE) {
length1 = length(which(margcoef >= quantile(randcoef, q)))
above.thresh = candind[rankcoef\$ix[1:length1]]
newix = candind[rankcoef\$ix[1:pleft]]
newix = sort(intersect(newix, above.thresh))
} else newix = candind[rankcoef\$ix[1:greedy.size]]
} else newix = NULL
}
} else {
margcoef1 = margcoef(x[s1, ], y[s1], ix1, family = family, null.model = FALSE, iterind = iterind)
margcoef2 = margcoef(x[s2, ], y[s2], ix1, family = family, null.model = FALSE, iterind = iterind)
rankcoef1 = sort(margcoef1, decreasing = TRUE, index.return = TRUE)
rankcoef2 = sort(margcoef2, decreasing = TRUE, index.return = TRUE)
if (perm == FALSE) {
if (pleft > 0) {
if (varISIS == "aggr") {
newix1 = candind[rankcoef1\$ix[1:pleft]]
newix2 = candind[rankcoef2\$ix[1:pleft]]
newix = sort(intersect(newix1, newix2))
}
if (varISIS == "cons") {
iensure = intensure(pleft, l1 = rankcoef1\$ix, l2 = rankcoef2\$ix, k = pleft)
newix1 = candind[rankcoef1\$ix[1:iensure]]
newix2 = candind[rankcoef2\$ix[1:iensure]]
newix = sort(intersect(newix1, newix2))
}
} else newix = NULL
} else {
randcoef1 = margcoef(x[s1, ], y[s1], ix1, family = family, null.model = TRUE, iterind = iterind)
randcoef2 = margcoef(x[s2, ], y[s2], ix1, family = family, null.model = TRUE, iterind = iterind)
if (length(which(margcoef1 >= quantile(randcoef1, q))) > 0 && length(which(margcoef2 >= quantile(randcoef2,                                                                          q))) > 0) {
if (greedy == FALSE) {
length1 = length(which(margcoef1 >= quantile(randcoef1, q)))
length2 = length(which(margcoef2 >= quantile(randcoef2, q)))
above.thresh.1 = candind[rankcoef1\$ix[1:length1]]
above.thresh.2 = candind[rankcoef2\$ix[1:length2]]
newix1 = candind[rankcoef1\$ix[1:pleft]]
newix2 = candind[rankcoef2\$ix[1:pleft]]
newix1 = sort(intersect(newix1, above.thresh.1))
newix2 = sort(intersect(newix2, above.thresh.2))
newix = sort(intersect(newix1, newix2))
} else {
length1 = length(which(margcoef1 >= quantile(randcoef1, q)))
length2 = length(which(margcoef2 >= quantile(randcoef2, q)))
newix1 = candind[rankcoef1\$ix[1:length1]]
newix2 = candind[rankcoef2\$ix[1:length2]]
iensure = intensure(greedy.size, l1 = newix1, l2 = newix2, k = greedy.size)
if(is.null(iensure)) newix = NULL
else newix = sort(intersect(newix1[1:iensure], newix2[1:iensure]))
}
} else newix = NULL
}
}
return(newix)
}

intensure <- function(i, l1, l2, k) {
for(j in i:length(l1)){
if (length(intersect(l1[1:j], l2[1:j])) >= k)
return(j)
}
# if (length(intersect(l1[1:i], l2[1:i])) >= k)
#    return(i) else return(intensure(i + 1, l1, l2, k))
}

int.size.k <- function(l1, l2, k) {
iensure = intensure(k, l1 = l1, l2 = l2, k = k)
ix01 = l1[1:iensure]
ix02 = l2[1:iensure]
ix0 = sort(intersect(ix01, ix02))
return(ix0)
}

calculate.nsis <- function(family, varISIS, n, p) {
if (varISIS == "aggr")
nsis = floor(n/log(n)) else {
if (family == "gaussian") {
nsis = floor(n/log(n))
}
if (family == "binomial") {
nsis = floor(n/(4 * log(n)))
}
if (family == "poisson") {
nsis = floor(n/(2 * log(n)))
}
if (family == "cox") {
nsis = floor(n/(4 * log(n)))
}
}
if (p < n)
nsis = p
return(nsis)
}
```

## Try the SIS package in your browser

Any scripts or data that you put into this service are public.

SIS documentation built on Feb. 14, 2018, 1:01 a.m.