Nothing
### This file contains the functions for determining the phase two pool sizes
### There are two ways to determine pool sizes for a given k and max.nrs:
### Fast way: does not consider family memberships
### Slow way: considers family memberships,
### and increases the pool sizes accordingly.
### 1. getMaxNforK(k,max.num.rs) - not exported
### 2. extractValidIM.OC(rm,fm,im)
### 3. extractValidIM(rm,fm,im,n.splits)
### 4. getOneExhaustiveIM(rm,k)
### 5. getPSforK(rm.ordered,k,max.nrs.ee,max.compute)
### 6. getPoolSizes(rm.ordered,k.max,max.nrs.ee,max.compute){
#getPoolSizeForK(rm.ordered,max.nrs,k,fastWay)
#(rm.ordered,k.max,max.num.rs.ee)
### 1.
### This function takes as input a k value and an upper bound, max.num.rs,
### and it findes the largest N such that (N choose K) < max.num.rs
getMaxNforK <- function(k,max.num.rs){
if(missing(max.num.rs)) max.num.rs <- 20000
go <- 1
N <- k+1
while(go){
temp <- choose(N,k)
if(temp > max.num.rs) go <- 0
N <- N + 1
}
max.N <- N-2 ### N-2 was last one under max.num.rs
return(max.N)
}
### 2.
extractValidIM.OC <- function(rm,fm,im){
fam.rules <- which(rowSums(fm)>1)
fam.rules <- intersect(fam.rules,im) ### ignore rules that don't appear in im
for(rule in fam.rules){
rule.fams <- setdiff(which(fm[rule,]==1),rule)
rule.fams <- setdiff(rule.fams,c(1:rule))
if(length(rule.fams)>0){
rule.idc <- which(apply(im,1,function(x)rule %in% x)==TRUE)
im.rule <- im[rule.idc,]
if(length(rule.idc)==1) im.rule <- t(as.matrix(im.rule))
im.nonrule <- im[setdiff(1:nrow(im),rule.idc),]
bad.idc <- which(apply(im.rule,1,function(x)length(intersect(rule.fams,x)))>0)
new.im.rule <- im.rule[setdiff(1:nrow(im.rule),bad.idc),]
new.im <- rbind(im.nonrule,new.im.rule)
im <- new.im
}
}
return(im)
}
### 3.
#' @importFrom foreach getDoParWorkers foreach %dopar%
extractValidIM <- function(rm,fm,im,n.splits){
im.cur <- NULL
n.cores <- getDoParWorkers()
if(missing(n.splits)) n.splits <- min(n.cores,nrow(im))
im.list <- splitMatIntoList(im,n.splits)
new.im <- foreach(im.cur = im.list,.combine = 'rbind',.multicombine = TRUE,.export=ls(envir=globalenv())) %dopar% {
extractValidIM.OC(rm,fm,im.cur)
}
return(new.im)
}
### 4.
makeOneExhaustiveIM <- function(rm,k){
im.raw <- t(combn(nrow(rm),k))
fm <- getFamMat(rm)
im <- extractValidIM(rm,fm,im.raw)
return(im)
}
### 5.
getPSforK <- function(rm.ordered,k,max.nrs.ee,max.compute){
#if(missing(max.compute)) max.compute <- 10^6
#if(missing(max.nrs.ee)) max.nrs.ee <- 10^5
this.max.compute <- min(max.compute,20*max.nrs.ee)
if(k <= 6) this.max.compute <- min(max.compute,5*max.nrs.ee)
beg <- Sys.time()
max.n <- getMaxNforK(k,this.max.compute)
max.n <- min(max.n,nrow(rm.ordered))
rm <- rm.ordered[1:max.n,]
im <- makeOneExhaustiveIM(rm,k)
if(nrow(im)==0){
pool.size <- 0
nrspk <- 0
}
if(nrow(im)>0){
all.maxs <- apply(im,1,max)
num.rs.below <- rep(NA,length=max.n)
for(j in 1:max.n) num.rs.below[j] <- length(which(all.maxs <= j))
pool.size <- max(which(num.rs.below <= max.nrs.ee))
im <- im[which(all.maxs<= pool.size),]
nrspk <- nrow(im)
}
#print(difftime(Sys.time(),beg,units = "min"))
#print(c(pool.size,nrspk))
return(pool.size)
}
# 6.
#' @title Get pool sizes for phase 2
#'
#' @param rm.ordered binary rule matrix ordered from phase 1
#' @param k.max maximum rule set size
#' @param max.nrs.ee max number of rule sets per k
#' @param max.compute maximum raw rule sets considered per k
#' @examples
#' library(crso)
#' data(skcm)
#' list2env(skcm.list,envir=globalenv())
#' rm.full <- buildRuleLibrary(D,rule.thresh = 0.05) # Rule library matrix, dimension: 60 x 71
#' rm.ordered <- rm.full # Skip phase one in this example
#' getPoolSizes(rm.ordered,k.max = 7,max.nrs.ee = 10000)
#' # [1] 60 60 40 23 18 16 15
#' @export
getPoolSizes <- function(rm.ordered,k.max,max.nrs.ee,max.compute){
if(missing(k.max)) k.max <- 12
if(missing(max.nrs.ee)) max.nrs.ee <- 10^5
if(missing(max.compute)) max.compute <- 10*max.nrs.ee
#beg <- Sys.time()
pool.sizes <- rep(NA,length=k.max)
names(pool.sizes) <- paste0("K.",1:k.max)
pool.sizes[1] <- nrow(rm.ordered)
for(k in 2:k.max) pool.sizes[k] <- getPSforK(rm.ordered,k,max.nrs.ee,max.compute)
#print(paste0("Time to make pool sizes: " ,signif(difftime(Sys.time(),beg,units="min"),4)," min"))
return(pool.sizes)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.