R/eStep1.R
In forsearch: Diagnostic Analysis Using Forward Search Procedure for Various Models

eStep1 <-
function (yf, df1=NULL, df1.ls=NULL, df1fact.list, inner.rank=NULL, initial.sample, formulaE=NULL, nofactform, 
                   start, nofactstart, inner.r.vector=NULL, contR=FALSE, algo, ycol, b.d) 
{
   #                                    eStep1   
     # 
     # VALUE      Produces rim for Step 1. Uses candprep function to form matrix of candidate sets of observation numbers
     #            and runs nls and predictor to determine set with median sum of squared errors. 
     #
     # INPUT      yf             yesfactor
     #            df1            Data frame being analyzed by forward search. 
     #            df1.ls         2-lovel list of df1 by sector and factor subset
     #            df1fact.list   1-level list by factor subset
     #            inner.rank     Number of observations to pull from entire database
     #            initial.sample Number of random samples from which to take rim
     #            formulaE       Formula for all effects including factors and constructed variables    
     #            nofactform     Formula for all effects, omitting factors
     #            start
     #            nofactstart    Vector of starting values, omitting factors
     #            inner.r.vector Vector of number to draw within each factor
     #            contR          Logical
     #            algo           nls algorithm
     #            ycol           Response column number
     #            b.d            begin.diagnose Ranges from 25 to 45
     #
     spacehere <- "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%      eStep1           "    
# print("in eStep1") 

                           if(b.d <=20 ){ print("",quote=FALSE);print(paste(spacehere,"Section 20",sep=" "),quote=FALSE);
                                 Hmisc::prn(yf);Hmisc::prn(utils::head(df1));Hmisc::prn(df1.ls);Hmisc::prn(df1fact.list);Hmisc::prn(inner.rank);
                                 Hmisc::prn(initial.sample);Hmisc::prn(dim(df1));Hmisc::prn(formulaE);
                                 Hmisc::prn(start);Hmisc::prn(inner.r.vector);Hmisc::prn(ycol);
                                 print("end of initial b.d in eStep1")   }
#stop("initial eStep1")
     ###################################
     # Set up final holding variables  #
     ###################################
     nobs <- dim(df1)[1]

     usection <- unique(df1$Section)     # will pull a number of obs to cover the sections completely 
     nusection <- length(usection)

     pullN <- inner.rank     # default. +1 to avoid nls crash
     if(yf){
          pullN <- max(inner.r.vector)            # candprep will pull this many for each factor/section subset
     }
     hold.cands <- candprep(yf=yf, dfa2=df1, fixd.ls=df1.ls, inner.rank=inner.r.vector, preprnk=pullN, 
                   in.sam=initial.sample, makearray=TRUE, b.d=b.d)                                       # candprep as 2-level list

     nfactorlevels <- length(hold.cands)
     SSE.outer <- rep(-99, initial.sample)
     SSE.inner <- matrix(-99, nrow=initial.sample, ncol=nfactorlevels)

     if(yf){
          ###############################################
          # Collapse all the sections of each subfactor #
          ###############################################
          hold.cands.sub <- vector("list", nfactorlevels)
          for(i in 1:nfactorlevels){
               tempmatrix <- NULL
               for(j in 1:nusection){
                    index <- 1:inner.r.vector[i]
                    tempmatrix <- cbind(tempmatrix, hold.cands[[i]][[j]][,index]    )
                    hold.cands.sub[[i]] <- tempmatrix
               }    #   j
          }    #    i
          #########################################################################
          # Calculate the SSE for each sample subset WITHIN its own factor subset #
          #########################################################################
          for(j in 1:nfactorlevels){
               hold.cands.c <- hold.cands.sub[[j]]   # candidate matrix within 1 factor subset
               markerhold <- df1[hold.cands.c[1,],][1,]
               markerhold <- markerhold$holdISG
               universe <- df1[df1$holdISG==markerhold,]
               ##############################################
               # If nls controls have been weakened and nls #
               # still doesn't converge, give notice and    #
               # ignore those candidates.                   #
               ##############################################
               for(r in 1:initial.sample){
                    xmat <- hold.cands.c[r,]       # 1 row out of initial.sample
                    #############################################
                    # Add a wild card to each candidate set     #
                    # if the candidate set is too small         #      SHOULD WE REALLY BE DOING THIS?
                    # Seems to need 1 more than starting number #
                    #############################################
                    if(length(xmat) <= length(nofactstart)){
                         uniObs <- universe$Observation
                         locat <- pmatch(xmat,uniObs)
                         wild <- uniObs[-locat]
                         wild <- sample(wild,1)
                         xmat <- c(wild, xmat)
                    }
                    smalldata <- df1[xmat,]
                    MED <- floor(dim(universe)[1]/2 + .00001)
                    MED[MED==0] <- 1
                    if(contR){
                         a.warn.occurred <- FALSE
                         tryCatch(
                           expr=(lmsmall <- stats::nls(formula=nofactform, data=smalldata, start=nofactstart, 
                           algorithm = algo, control=list(maxiter=1000, tol=.001, warnOnly=FALSE)))
                           , warning=function(w) {a.warn.occurred <- TRUE}
                         )                                                                                     #  nls
                         if(a.warn.occurred){                            
                              messprop <- "nls failed to converge"
                              print(messprop, quote=FALSE)
                              SSE.inner[r,j] <- -99
                         }
                         else{
                              predsmall <- stats::predict(lmsmall, newdata=universe)                            # predict
                              errorsmall <- df1[, ycol] - predsmall
                              sserrorsmall <- sort(errorsmall^2)  
                              SSE.inner[r,j] <- sserrorsmall[MED]
                         }
                    }    #  contR TRUE
                    else{
                         lmsmall <- stats::nls(formula=nofactform, data=smalldata, start=start, algorithm=algo)                                                 # nls
                         predsmall <  - stats::predict(lmsmall, newdata=universe)                                # predict
 
                         errorsmall <- df1[, ycol] - predsmall
                         sserrorsmall <- sort(errorsmall^2)  
                         SSE.inner[r,j] <- sserrorsmall[MED]
                    }# contR FALSE
               }     #   r
          }          #   j
          sumup <- SSE.inner == -99
          sumsumup <- sum(sumup)
          propsum <- 100 * sumsumup/(initial.sample * nfactorlevels)
          if(propsum > 0){
               print(" ", quote=FALSE)
               print("Testing squared errors within factors,", quote=FALSE)
               summess <- paste(sumsumup, "(", propsum, "%)", "candidate Step 1 observation sets failed to converge", sep=" ")
               print(summess, quote = FALSE)
               print(" ", quote = FALSE)
               maxSSEinner <- max(SSE.inner)
               index <- SSE.inner < 0               
               SSE.inner[index] <- maxSSEinner + 10
          }        # propsum > 0
          else{
               print(" ", quote=FALSE)
               print("Testing squared errors within factors, nls calculations for all candidate sets converged.", quote=FALSE)
               print(" ", quote=FALSE)
          }
     }     #    end of yf TRUE    EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE
     #
     ###################################################
     # Compute outer SSE for both yf TRUE and yf FALSE #
     ###################################################
     hold.cands.b <- NULL
     for(jj in 1:nfactorlevels){
          for(kk in 1:nusection){
               uu <- hold.cands[[jj]][[kk]]
               dimuu2 <- dim(uu)[2]
               uu <- uu[,-dimuu2]
               hold.cands.b <- cbind(hold.cands.b, uu)
          }   #  kk
     }        #  jj  
     #
     ##############################################
     # Calculate the outer median squared error   #
     # If nls controls have been weakened and nls #
     # still doesn't converge, give notice and    #
     # ignore those candidates.                   #
     ##############################################
     universe <- df1
     MED <- floor(dim(universe)[1]/2 + .00001)
     MED[MED==0] <- 1
     for(r in 1:initial.sample){
          xmat <- hold.cands.b[r,]
          smalldata <- df1[xmat,]
          if(contR){
               a.warn.occurred <- FALSE
               an.error.occurred <- FALSE
               tryCatch(
                    expr=(lmsmall <- stats::nls(formula=formulaE, data=smalldata, start=start, 
                    algorithm = algo, control=list(maxiter=1000, tol=.001, warnOnly=FALSE)))
                    , warning=function(w) {a.warn.occurred <- TRUE}
                    , error=function(e) {an.error.occurred <- TRUE}
               )                                                                              #  nls
               if(a.warn.occurred | an.error.occurred){                            
                    messprop <- "nls failed to converge"
                    print(messprop, quote=FALSE)
                    SSE.outer[r] <- -999
                    if(a.warn.occurred){print("It was a warning") }
                    if(an.error.occurred) {print("It was an error")}
                    stop("failed")
               }
               else{
                    predsmall <- stats::predict(lmsmall, newdata=universe)                        # predict
                    errorsmall <- df1[, ycol] - predsmall
                    sserrorsmall <- sort(errorsmall^2)  
                    SSE.outer[r] <- sserrorsmall[MED]
              }
          }      #   contR
          else{
               lmsmall <- stats::nls(formula=formulaE, data=smalldata, start=start, algorithm=algo)  # nls
               predsmall <- stats::predict(lmsmall, newdata=universe)                                # predict

               errorsmall <- df1[, ycol] - predsmall
               sserrorsmall <- sort(errorsmall^2)  
               SSE.outer[r] <- sserrorsmall[MED]
          }    # not contR
     }     # r
     ##########################################################################################
     # Combine into a single SSE.inner and penalize any candidate set that failed to converge #
     ##########################################################################################
     SSE.inner <- apply(SSE.inner,1,mean)
     SSE.inner[SSE.inner < 0]<- max(SSE.inner) + 10
     #
     sumup <- SSE.outer == -99
     sumsumup <- sum(sumup)
     propsum <- 100 * sumsumup/initial.sample
     if(propsum > 0){
          print("Testing outer squared errors,", quote=FALSE)
          summess <- paste(sumsumup, "(", propsum, "%)", "candidate Step 1 observation sets failed to converge", sep=" ")
          print(summess, quote = FALSE)
          print(" ", quote = FALSE)
          maxSSEouter <- max(SSE.outer)
          index <- SSE.outer < 0               
          SSE.outer[index] <- maxSSEouter + 10
     }   # propsum > 0
     else{
          print("Testing outer squared errors, nls calculations converged for all candidate sets.", quote=FALSE)
          print(" ", quote=FALSE)
     }
          #
          #################################################################
          # Penalize any candidate set whose SSE.outer failed to converge #
          # Combine SSE.inner to SSE.outer if there are factors           #
          #################################################################
          SSE.outer[SSE.outer < 0] <- max(SSE.outer) + 10
          if(yf){
               SSE <- (SSE.inner + SSE.outer)/2
          }
          else{
               SSE <- SSE.outer
          }
          ##############################################################
          # Attach SSE to hold.cands.c and sort to identify lowest MED #
          ##############################################################
          rimoutx <- cbind(SSE, hold.cands.b)
          rimoutx <- rimoutx[order(rimoutx[,1]),]
          rimoutx <- rimoutx[,-1]
          rimoutx <- rimoutx[1,]
# print("leaving eStep1")

     return(sort(rimoutx))
}
Any scripts or data that you put into this service are public.
forsearch documentation built on April 4, 2025, 5:52 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
forsearch
Diagnostic Analysis Using Forward Search Procedure for Various Models

R/eStep1.R
In forsearch: Diagnostic Analysis Using Forward Search Procedure for Various Models

Try the forsearch package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

forsearch Diagnostic Analysis Using Forward Search Procedure for Various Models

R/eStep1.R In forsearch: Diagnostic Analysis Using Forward Search Procedure for Various Models

Try the forsearch package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

forsearch
Diagnostic Analysis Using Forward Search Procedure for Various Models

R/eStep1.R
In forsearch: Diagnostic Analysis Using Forward Search Procedure for Various Models