R/cv_GIC_indexed.R
In DMRnet: Delete or Merge Regressors Algorithms for Linear and Logistic Model Selection and High-Dimensional Data

cv_GIC_indexed <- function(X, y, nfolds, model_function, ...) {

        family = list(...)$family

        if (family == "gaussian"){
                n <- length(y)
                real_n <- 0 #recount  of test instances
                foldid <- sample(rep(1:nfolds,length.out=n))   #PP replaces cvfolds by a simpler sample(rep()) function
                err <- list(); rss <- list(); #md <- list()

                model.full <- model_function(X, y, ...)
                lambda.full<- model.full$lambda

                for (fold in 1:nfolds){

                        Xte <- X[foldid == fold, ,drop = FALSE]
                        yte <- y[foldid == fold, drop = FALSE]
                        Xtr <- X[foldid != fold, ,drop = FALSE]
                        ytr <- y[foldid != fold, drop = FALSE]

                        compute_model <- cv_compute_model(model_function, Xtr, ytr, Xte, yte, real_n, lambda.full = lambda.full, ...)   #three letter abbreviations (lambda.full vs lam) make this function call confused, so explicit passing of named parameter i.e. lambda.full=lambda.full is required
                        model<-compute_model$model
                        Xtr<-compute_model$Xtr
                        ytr<-compute_model$ytr
                        Xte<-compute_model$Xte
                        yte<-compute_model$yte
                        real_n<-compute_model$real_n

                        #PP new code
                        rss[[fold]] <- model$rss

                        pred <- predict.DMR(model, newx = as.data.frame(Xte))
                        #PP new code error[[fold]] <- apply(pred, 2, function(z) sum((z - yte)^2))
                        err[[fold]] <- apply(pred, 2, function(z) mean((z - yte)^2))

                }


                len_err <- sapply(err, length)
                foldmin <- min(len_err)
                ERR <- sapply(1:nfolds, function(i) err[[i]][ (len_err[i] - foldmin + 1) : len_err[i] ] )
                if (foldmin == 1) {
                  ERR<-t(as.matrix((ERR)))  #making it a horizontal one-row matrix
                }
                #err <- rowMeans(ERR); kt <- which(err == min(err)); df.min <- dmr$df[kt[length(kt)]]; plot(err, type="o")

                p1 <- model.full$df[1]
                s2 <- model.full$rss[1]/(n-p1)

                p <- ncol(model.full$beta)
                if (is.null(p))
                  p <- length(model.full$beta)

                RIC_constant <- constants()$RIC_gaussian_constant
                Const <- exp(seq(log(RIC_constant/50),log(RIC_constant*50), length=81))
                laGIC <- Const*log(p)*s2
                RSS <- sapply(1:nfolds, function(i) rss[[i]][ (len_err[i] - foldmin + 1) : len_err[i] ] )
                if (is.null(dim(RSS))) {
                  RSS<-t(as.matrix((RSS)))  #making it a horizontal one-row matrix
                }
                #MD <- sapply(1:nfolds, function(i)  md[[i]][ (len_err[i] - foldmin + 1) : len_err[i] ] )
                IND <- apply( RSS, 2, function(r) sapply( laGIC, function(la) which.min(r+la*length(r):1) ) )
                errGIC <- apply( IND, 1, function(ind) mean(ERR[cbind(ind,1:nfolds)]) )
                #mdGIC  <- apply( IND, 1, function(ind) mean(MD[cbind(ind,1:10)]) )
                #plot(mdGIC[length(laGIC):1],errGIC[length(laGIC):1]/s2, xlab="MD", ylab="PE", type="o")

                r <- model.full$rss
                kt <- which(errGIC == min(errGIC))
                indGIC <- kt[length(kt)]    #TODO: why last?
                gic.full <- (r+laGIC[indGIC]*length(r):1)/(real_n*s2)
                #plot(gic.full[length(gic.full):1])

        } else{
                if (family == "binomial"){
                        if (!inherits(y, "factor")){
                                stop("Error: y should be a factor")
                        }
                        lev <- levels(factor(y))
                        if (length(lev) != 2){
                                stop("Error: factor y should have 2 levels")
                        }
                        n1 <- table(y)[1]
                        n2 <- table(y)[2]
                        real_n <- 0 #recount  of test instances

                        foldid1 <- sample(rep(1:nfolds,length.out=n1))  #PP replaces cvfolds by a simpler sample(rep()) function
                        foldid2 <- sample(rep(1:nfolds,length.out=n2))  #PP replaces cvfolds by a simpler sample(rep()) function
                        foldid <- c()
                        foldid[which(y == levels(factor(y))[1])] = foldid1
                        foldid[which(y == levels(factor(y))[2])] = foldid2
                        #PP new code error <- list()
                        err <- list(); loglik <- list(); #md <- list()

                        model.full <- model_function(X, y, ...)
                        lambda.full<- model.full$lambda

                        for (fold in 1:nfolds) {

                                Xte <- X[foldid == fold, , drop = FALSE]
                                yte <- y[foldid == fold, drop = FALSE]
                                Xtr <- X[foldid != fold, , drop = FALSE]
                                ytr <- y[foldid != fold, drop = FALSE]

                                compute_model <- cv_compute_model(model_function, Xtr, ytr, Xte, yte, real_n, lambda.full = lambda.full, ...)   #three letter abbreviations (lambda.full vs lam) make this function call confused, so explicit passing of named parameter i.e. lambda.full=lambda.full is required
                                model<-compute_model$model
                                Xtr<-compute_model$Xtr
                                ytr<-compute_model$ytr
                                Xte<-compute_model$Xte
                                yte<-compute_model$yte
                                real_n<-compute_model$real_n

                                #SzN new code based on PP new code
                                loglik[[fold]] <- -2*model$loglik

                                pred <- predict.DMR(model, newx = as.data.frame(Xte), type = "class")
                                #SzN new code based on PP new code error[[fold]] <- apply(pred, 2, function(z) sum(z != yte))
                                err[[fold]] <- apply(pred, 2, function(z) mean(z != yte))

                        }

                        len_err <- sapply(err, length)
                        foldmin <- min(len_err)
                        ERR <- sapply(1:nfolds, function(i) err[[i]][ (len_err[i] - foldmin + 1) : len_err[i] ] )
                        #err <- rowMeans(ERR); kt <- which(err == min(err)); df.min <- dmr$df[kt[length(kt)]]; plot(err, type="o")


                        if (foldmin == 1) {
                          ERR<-t(as.matrix((ERR)))  #making it a horizontal one-row matrix
                        }

                        p <- ncol(model.full$beta)
                        if (is.null(p))
                          p <- length(model.full$beta)

                        RIC_constant <- constants()$RIC_binomial_constant
                        Const <- exp(seq(log(RIC_constant/50),log(RIC_constant*50), length=81))
                        laGIC <- Const*log(p)
                        LOGLIK <- sapply(1:nfolds, function(i) loglik[[i]][ (len_err[i] - foldmin + 1) : len_err[i] ] )
                        if (is.null(dim(LOGLIK))) {
                          LOGLIK<-t(as.matrix((LOGLIK)))  #making it a horizontal one-row matrix
                        }
                        #MD <- sapply(1:nfolds, function(i)  md[[i]][ (len_err[i] - foldmin + 1) : len_err[i] ] )
                        IND <- apply( LOGLIK, 2, function(ll) sapply( laGIC, function(la) which.min(ll+la*length(ll):1) ) )
                        errGIC <- apply( IND, 1, function(ind) mean(ERR[cbind(ind,1:nfolds)]) )
                        #mdGIC  <- apply( IND, 1, function(ind) mean(MD[cbind(ind,1:10)]) )
                        #plot(mdGIC[length(laGIC):1],errGIC[length(laGIC):1]/s2, xlab="MD", ylab="PE", type="o")

                        ll <- -2*model.full$loglik
                        kt <- which(errGIC == min(errGIC))
                        indGIC <- kt[length(kt)]    #TODO: why last?
                        gic.full <- (ll+laGIC[indGIC]*length(ll):1)/real_n
                        #plot(gic.full[length(gic.full):1])

                }
                else{
                        stop("Error: wrong family, should be one of: gaussian, binomial")
                }
        }
        kt <- which(gic.full == min(stats::na.omit(gic.full))) #kt stores indexes in error equal to a minimum error.
        #if there is more than one such index, the LAST one is the one returned, because LAST means a smaller model.
        indMod <- kt[length(kt)]
        df.min <- model.full$df[indMod]

        kt <- which(gic.full <= min(stats::na.omit(gic.full)) + stats::sd(stats::na.omit(gic.full[gic.full!=Inf & gic.full!=-Inf])))
        if (length(kt) == 0) {
          df.1se <- NULL
        } else {
          indMod <- kt[length(kt)]
          df.1se <- model.full$df[indMod]
        }

        out <- list(df.min = df.min, df.1se = df.1se, dmr.fit = model.full, cvm = gic.full, foldid = foldid)
        return(out)
}

Any scripts or data that you put into this service are public.

DMRnet documentation built on Aug. 7, 2023, 5:11 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

DMRnet
Delete or Merge Regressors Algorithms for Linear and Logistic Model Selection and High-Dimensional Data

R/cv_GIC_indexed.R
In DMRnet: Delete or Merge Regressors Algorithms for Linear and Logistic Model Selection and High-Dimensional Data

Defines functions cv_GIC_indexed

Try the DMRnet package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

DMRnet Delete or Merge Regressors Algorithms for Linear and Logistic Model Selection and High-Dimensional Data

R/cv_GIC_indexed.R In DMRnet: Delete or Merge Regressors Algorithms for Linear and Logistic Model Selection and High-Dimensional Data

Defines functions cv_GIC_indexed

Try the DMRnet package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

DMRnet
Delete or Merge Regressors Algorithms for Linear and Logistic Model Selection and High-Dimensional Data

R/cv_GIC_indexed.R
In DMRnet: Delete or Merge Regressors Algorithms for Linear and Logistic Model Selection and High-Dimensional Data