Nothing
compute_initial_weights = function(y,x){
# The function returns initial weights.
initial_weights = as.numeric(cor(y,x))^2
initial_weights = initial_weights/(sum(initial_weights))
return(initial_weights)
}
compute_scores = function(y,x,m,B,initial_weights=NULL){
# The function returns RSM scores.
p = ncol(x)
scores = numeric(p)
ns = numeric(p)
for(k in 1:B){
submodel = sample(1:p,size=m,replace=FALSE,prob=initial_weights)
lm1 = lm(y~x[,submodel])
weights = as.numeric((summary(lm1)$coef[-1,3])^2)
submodel = submodel[which(!is.na(coef(lm1)[-1]))] #UPDATE: deal with collinearity
scores[submodel] = scores[submodel] + weights
ns[submodel] = ns[submodel] + 1
}
ns = ifelse(ns!=0,ns,1)
scores = scores/ns
return(scores)
}
compute_scores_parallel = function(y,x,m,B,initial_weights,nslaves){
#This function returns RSM scores computed using parallel function foreach %dopar%.
p = ncol(x)
requireNamespace("foreach",quietly = TRUE)
taskCount=floor(B/nslaves)
r1 = foreach(i = 1:(nslaves-1), .combine=cbind, .export=c("compute_scores_partial")) %dopar%{
scores_ns = compute_scores_partial(y,x,m,taskCount,initial_weights)
scores_ns
}
taskCount = B-taskCount*(nslaves-1)
r2 = compute_scores_partial(y,x,m,taskCount,initial_weights)
r3 = cbind(r1,r2)
scores = apply(r3[1:p,],1,sum)
ns = apply(r3[-(1:p),],1,sum)
ns = ifelse(ns!=0,ns,1)
scores = scores/ns
return(scores)
}
compute_scores_partial = function(y,x,m,taskCount,initial_weights){
# The function returns RSM scores computed based on taskCount repetitions. Unlike the compute_scores this function returns concatenated vector of scores and ns.
p = ncol(x)
scores = numeric(p)
ns = numeric(p)
for(k in 1:taskCount){
submodel = sample(1:p,size=m,replace=FALSE,prob=initial_weights)
lm1 = lm(y~x[,submodel])
weights = as.numeric((summary(lm1)$coef[-1,3])^2)
submodel = submodel[which(!is.na(coef(lm1)[-1]))] #UPDATE: deal with collinearity
scores[submodel] = scores[submodel] + weights
ns[submodel] = ns[submodel] + 1
}
return(c(scores,ns))
}
is.wholenumber = function(x, tol = .Machine$double.eps^0.5){
# The function checks if the given number is whole number.
abs(x - round(x)) < tol
}
select_finalmodel_bic = function(y,x,order1,thrs,penalty){
# The function returns final model using Bayesian Information Criterion.
n = length(y)
lm0 = lm(y~1)
beta0 = as.numeric(coef(lm0))
rss0 = sum(lm0$residuals^2)
bic0 = n*log(rss0/n)+1*penalty
xo = x[,order1[1:thrs]]
xo = cbind(1,xo)
xo = as.matrix(xo)
# qr1 = qr(xo)
# Q = qr.Q(qr1)
# R = qr.R(qr1)
# R_inv = solve(R)
# tQy = t(Q) %*% y
qr1 = qr(xo)
Q = qr.Q(qr1)
R = qr.R(qr1)
small_diag = which(abs(diag(R))<1e-7)
if(length(small_diag)>0){
#Deal with collinearities, coefficients corresponding to linearly dependent columns will be 0.
del = qr1$pivot[small_diag]
Q1 = Q[,-small_diag]
R1= R[-small_diag,-small_diag]
R1_inv = solve(R1)
tQy1 = t(Q1) %*% y
R_inv = matrix(0,ncol(xo),ncol(xo))
tQy = numeric(ncol(xo))
R_inv[-del,-del]=R1_inv
tQy[-del] = tQy1
warning("Multicollinearity in data. Only coeffients corresponding to linearly independent columns are estimated.")
}else{
R_inv = solve(R)
tQy = t(Q) %*% y
}
betabj = vector("list",thrs)
rssj = numeric(thrs)
bic = numeric(thrs)
RSthrs = y-Q %*% tQy
rssj[thrs] = t(RSthrs) %*% RSthrs
for(j in thrs:2){
rssj[j-1] = rssj[j]+ tQy[(j+1)]^2
bic[j] = n*log(rssj[j]/n)+(j+1)*penalty
}
bic[1] = n*log(rssj[1]/n)+2*penalty
sel = which.min(bic)
betab = as.numeric(R_inv[1:(sel+1),1:(sel+1)] %*% tQy[1:(sel+1)])
model_sel = order1[1:sel]
if(bic0<bic[sel]){
betab = beta0
model_sel = 0
}
Result = list(model=model_sel,informationCriterion=bic,coefficients=betab)
return(Result)
}
select_finalmodel_qr = function(y,x,yval,xval,order1){
# The function returns final model corresponding to minimal prediction error on validation set.
n = length(y)
p = ncol(x)
x = x[,order1]
xval = xval[,order1]
p1 = ifelse(p<=(n-1),p,n-1)
x = x[,1:p1]
xval = xval[,1:p1]
x=cbind(1,x)
xval=cbind(1,xval)
# qr1 = qr(x)
# Q = qr.Q(qr1)
# R = qr.R(qr1)
# R_inv = solve(R)
# tQy = t(Q) %*% y
qr1 = qr(x)
Q = qr.Q(qr1)
R = qr.R(qr1)
small_diag = which(abs(diag(R))<1e-7)
if(length(small_diag)>0){
#Deal with collinearities, coefficients corresponding to linearly dependent columns will be 0.
del = qr1$pivot[small_diag]
Q1 = Q[,-small_diag]
R1= R[-small_diag,-small_diag]
R1_inv = solve(R1)
tQy1 = t(Q1) %*% y
R_inv = matrix(0,ncol(x),ncol(x))
tQy = numeric(ncol(x))
R_inv[-del,-del]=R1_inv
tQy[-del] = tQy1
warning("Multicollinearity in data. Only coefficients corresponding to linearly independent columns are estimated.")
}else{
R_inv = solve(R)
tQy = t(Q) %*% y
}
betab = vector("list",p1)
betab[[p1]] = R_inv %*% tQy
pred_err = numeric(p1)
xval_R_inv = xval %*% R_inv
pred = xval_R_inv %*% tQy
pred_err[p1] = mean( (yval-pred)^2 )
for(j in p1:2){
betab[[j-1]] = betab[[j]] - as.numeric(tQy[j+1] * R_inv[,(j+1)])
pred = pred - tQy[j+1] * xval_R_inv[,(j+1)]
pred_err[j-1] = mean( (yval - pred)^2 )
}
jsel=which.min(pred_err)
model=order1[1:jsel]
Result = list(model=model,predError=pred_err,coefficients=betab[[jsel]][1:(jsel+1)])
return(Result)
}
new.regRSM <- function()
{
regRSM=list(scores=NULL,model=NULL,time=list(user=0,system=0,elapsed=0),
data_transfer=list(user=0,system=0,elapsed=0),
coefficients=NULL, predError=NULL,input_data=list(x=NULL,y=NULL),
control=list(useGIC=NULL,selval=NULL,screening=NULL,init_weights=FALSE,parallel=NULL,m=NULL,B=NULL))
attr(regRSM,"class")="regRSM"
return(regRSM)
}
#regRSM = function(x,y,yval,xval,m,B, parallel,nslaves,store_data,screening,init_weights,useGIC,thrs,penalty) UseMethod("regRSM")
regRSM = function(x,...) UseMethod("regRSM")
regRSM.default = function(x,y,yval=NULL,xval=NULL,m=NULL,B=NULL, parallel="NO",nslaves=c(4),
store_data=FALSE,screening=NULL,init_weights=FALSE,useGIC=TRUE,thrs=NULL,penalty=NULL,...)
{
data_x = x;
x = as.matrix(x)
y = as.numeric(y)
n = length(y)
p = ncol(x)
scores = NULL
startTime <- proc.time()
#Check for missing values:
complete_cases1 = complete.cases(x)
which_row_na = which(!complete_cases1)
if(length(which_row_na)!=0){
stop("Missing values in rows: ",toString(which_row_na))
}
# Set default values of m and B
if(is.null(m)){
m = floor(min(n-1,p)/2)
}else{
if(m>(n-2)) stop("Parameter m cannot be larger than the number of observations minus two!")
if(m<=0) stop("Parameter m must be a positive number!")
if(!is.wholenumber(m)) stop("Parameter m must be a whole number!")
}
if(is.null(B)){
B = 1000
}else{
if(B<=0) stop("Parameter B must be a positive number!")
if(!is.wholenumber(B)) stop("Parameter B must be a whole number!")
}
#Check for screeneing
if(!is.null(screening))
{
if((screening>=1)||(screening<=0)) stop("screening must be in (0,1)")
iw = compute_initial_weights(y,x)
sel = which(iw>=quantile(iw,screening))
if(m>length(sel)) stop('Parameter m cannot be larger than the number of attributes remaining after screening procedure!')
x = x[,sel]
}
#Check for initial_weights
if(init_weights){
initial_weights = compute_initial_weights(y,x)
}else{
initial_weights = NULL
}
#RSM method esence
if(parallel=="MPI"){
if("Rmpi" %in% rownames(installed.packages()) == FALSE){
stop("Please install package 'Rmpi' to use MPI parallel version of regRSM")
}
requireNamespace("Rmpi",quietly = TRUE)
create_slaves(nslaves)
d1=proc.time()
send_data(y,x,m,initial_weights)
d2=proc.time()
p = ncol(x)
n = nrow(x)
scores=make_experimentStatic(n,p,B,m,initial_weights)
}else{
if(parallel=="POSIX"){
if("doParallel" %in% rownames(installed.packages()) == FALSE){
stop("Please install package 'doParallel' to use POSIX parallel version of regRSM")
}
requireNamespace("doParallel",quietly = TRUE)
requireNamespace("parallel",quietly = TRUE)
d1=proc.time()
cl = makeCluster(nslaves)
registerDoParallel(cl)
#registerDoParallel(nslaves)
d2=proc.time()
scores = compute_scores_parallel(y,x,m,B,initial_weights,nslaves)
stopCluster(cl)
#stopImplicitCluster()
}else{
d1=d2=proc.time()
scores = compute_scores(y,x,m,B,initial_weights)
}
}
#Set score 0, when variable is not selected by screeneing
if(!is.null(screening)){
scores1 = numeric(ncol(data_x))
scores1[sel] = scores
scores = scores1
}
selval = ifelse(!is.null(yval) && !is.null(xval),TRUE,FALSE)
if(selval) useGIC = FALSE
if(useGIC){
if(is.null(penalty)){
penalty = log(length(y))
}else{
if(penalty<0) stop("Penalty must be positive!")
}
if(is.null(thrs)){
thrs = ifelse(p<=floor(n/2),p,floor(n/2))
}else{
if(thrs>min(p,(n-2))) stop("Parameter thrs cannot be larger than min(p,(n-2))!")
if(thrs<=1) stop("Parameter thrs must be greater than one!")
if(!is.wholenumber(thrs)) stop("Parameter thrs must be a whole number!")
}
order1 = sort(scores,decreasing=TRUE,index.return=TRUE)$ix
selected_model = select_finalmodel_bic(y,data_x,order1,thrs,penalty)
model = selected_model$model
coefficients = as.numeric(selected_model$coefficients)
predError = NULL
informationCriterion = selected_model$informationCriterion
}else{
if(selval==TRUE){
order1 = sort(scores,decreasing=TRUE,index.return=TRUE)$ix
selected_model = select_finalmodel_qr(y,data_x,yval,xval,order1)
model = selected_model$model
coefficients = as.numeric(selected_model$coefficients)
predError = selected_model$predError
informationCriterion = NULL
}else{
model = NULL
coefficients = NULL
predError = NULL
informationCriterion = NULL
}
}
stopTime <- proc.time()
regRSM = new.regRSM()
regRSM$scores = scores
regRSM$model = model
regRSM$time = stopTime-startTime
regRSM$coefficients = coefficients
regRSM$predError = predError
regRSM$informationCriterion = informationCriterion
regRSM$data_transfer = d2-d1
if(store_data) { regRSM$input_data$x=data_x; regRSM$input_data$y=y }
regRSM$control$useGIC = useGIC
regRSM$control$selval = selval
regRSM$control$screening = screening
regRSM$control$init_weights = init_weights
regRSM$control$parallel = parallel
regRSM$control$m = m
regRSM$control$B = B
print(regRSM)
return(regRSM)
}
regRSM.formula <- function(formula, data=NULL, ...)
{
mf = model.frame(formula,data)
x = model.matrix(attr(mf, "terms"), data=mf)
#Remove column corresponding to the intercept
x = as.matrix(x[,-1])
y = as.numeric(model.response(mf))
est = regRSM(x, y, ...)
cl = match.call()
est$call = cl
est$formula = formula
return(est)
}
predict.regRSM = function(object,xnew,...){
if(is.null(object$coefficients)) stop("The final model is not selected!")
ynew <- as.numeric(cbind(1,xnew[,object$model]) %*% coef(object))
return(ynew)
}
plot.regRSM = function(x,...){
object = x
if(!is.null(object$informationCriterion)){
p1 = length(object$informationCriterion)
plot(1:p1,object$informationCriterion,type="b",xlab="Variables",ylab="Generalized Information Criterion",main="Generalized Information Criterion",...)
}else{
if(!is.null(object$predError)){
p1 = length(object$predError)
plot(1:p1,object$predError,type="b",xlab="Variables",ylab="Prediction Error",main="Prediction Error on Validation Set",...)
}else{
cat("The final model is not selected! \n")
}
}
}
ImpPlot = function(object) UseMethod("ImpPlot")
ImpPlot.default <- function(object)
stop("No method implemented for this class of object")
ImpPlot.regRSM = function(object){
#The function produces dot plot showing variable importances of the variables.
if (!inherits(object, "regRSM"))
stop("This function only works for objects of class `regRSM'")
dotchart(object$scores,main="Variable Importance measure",xlab="RSM scores",ylab="Variable number")
}
validate = function(object,yval,xval) UseMethod("validate")
validate.default <- function(object)
stop("No method implemented for this class of object")
validate.regRSM = function(object,yval,xval){
#The function selects the new final model based on the final scores form the original fit.
if(is.null(object$input_data$x) || is.null(object$input_data$y)) stop('Argument store_data must be TRUE!')
order1 = sort(object$scores,decreasing=TRUE,index.return=TRUE)$ix
selected_model = select_finalmodel_qr(object$input_data$y,object$input_data$x,yval,xval,order1)
model = selected_model$model
predError = selected_model$predError
coefficients = as.numeric(selected_model$coefficients)
object$model=model
object$coefficients=coefficients
object$predError=predError
return(object)
}
print.regRSM = function(x,...){
#The function prints information about the model.
object = x
cat("\n Model summary: \n\n")
if(object$control$useGIC){
cat("Selection method: Generalized Information Criterion \n")
}else{
if(object$control$selval==TRUE){
cat("Selection method: Validation set \n")
}else{
cat("Selection method: Final model is not selected! \n")
}
}
if(!is.null(object$control$screening)){
cat("Screening: yes \n")
}else{
cat("Screening: no \n")
}
if(object$control$init_weights){
cat("Initial weights: yes \n")
}else{
cat("Initial weights: no \n")
}
if(object$control$parallel=="MPI" | object$control$parallel=="POSIX"){
cat("Version: parallel \n")
}else{
cat("Version: sequential \n")
}
cat("Subspace size:", object$control$m, "\n")
cat("Number of simulations:", object$control$B, "\n")
if(object$control$parallel=="MPI" ){
cat("Remember to use: 'mpi.close.Rslaves()' before terminate the current R session. \n")
}
}
summary.regRSM = function(object,...){
#The function prints information about the model.
cat("\n Model summary: \n\n")
if(object$control$useGIC){
cat("Selection method: Generalized Information Criterion \n")
}else{
if(object$control$selval==TRUE){
cat("Selection method: Validation set \n")
}else{
cat("Selection method: Final model is not selected! \n")
}
}
if(!is.null(object$control$screening)){
cat("Screening: yes \n")
}else{
cat("Screening: no \n")
}
if(object$control$init_weights){
cat("Initial weights: yes \n")
}else{
cat("Initial weights: no \n")
}
if(object$control$parallel=="MPI" | object$control$parallel=="POSIX"){
cat("Version: parallel \n")
}else{
cat("Version: sequential \n")
}
cat("Subspace size:", object$control$m, "\n")
cat("Number of simulations:", object$control$B, "\n")
if(object$control$parallel=="MPI" ){
cat("Remember to use: 'mpi.close.Rslaves()' before terminate the current R session. \n")
}
}
roc = function(object,truemodel,plotit,...) UseMethod("roc")
roc.default <- function(object)
stop("No method implemented for this class of object")
roc.regRSM = function(object,truemodel,plotit=TRUE,...){
# The function produces ROC curve for ordering and computes AUC.
ordering = sort(object$scores,decreasing=TRUE,index.return=TRUE)$ix
if(length(setdiff(truemodel,ordering))>=1){
stop("Argument truemodel must be a subset of ordering!")
}
p = length(ordering)
npos = length(truemodel)
nneg = p - npos
tpr = numeric(p+1)
fpr = numeric(p+1)
tpr[1] = 0
fpr[1] = 0
for(j in 2:(p+1)){
if(ordering[j-1] %in% truemodel){
tpr[j] = tpr[j-1] + 1
fpr[j] = fpr[j-1]
}else{
tpr[j] = tpr[j-1]
fpr[j] = fpr[j-1] + 1
}
}
tpr = tpr/npos
fpr = fpr/nneg
counts = as.numeric(summary(as.factor(tpr[tpr!=0])))
xs = (counts-1)*(1/nneg)
values = unique(tpr[tpr!=0])
auc = sum( xs * values )
if(plotit){
plot(fpr,tpr,type="b",main="ROC curve for RSM ordering")
legend("bottomright",paste("AUC= ",round(auc,4)))
}else{
return(auc)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.