R/aStep1.R
In forsearch: Diagnostic Analysis Using Forward Search Procedure for Various Models

aStep1 <-
function (yesfactor, df1, df1.ls, inner.rank, initial.sample, formulaA, nofactform, ycol, b.d) 
{
     #                                    aStep1   
     # 
     # VALUE      Produces rim for Step 1. Uses candprep function to form matrix of candidate sets of observation numbers
     #            and runs lm and predictor to determine set with median sum of squared errors.
     #
     # INPUT      yesfactor      Logical. TRUE if there are factors in the X matrix 
     #            df1            Data frame being analyzed by forward search. Observation column has no effect on lm analysis
     #            df1.ls         List of df1 by factor subset or NULL
     #            inner.rank     Vector. Number of observations to pull from each factor subset, or from overall database otherwise
     #            initial.sample Number of random samples from which to take rim
     #            formulaA       Formula for all effects including factors and constructed variables    
     #            nofactform     Formula for all effects, omitting factors
     #            ycol           Response column number
     #            b.d            begin.diagnose Ranges from 25 to 45
     #
     spacehere <- "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%      aStep1           "    
# print("in aStep1") 

                           if(b.d <=31 ){ print("",quote=FALSE);print(paste(spacehere,"Section 31",sep=" "),quote=FALSE);
                                 Hmisc::prn(yesfactor);Hmisc::prn(utils::head(df1));Hmisc::prn(df1.ls);Hmisc::prn(inner.rank);
                                 Hmisc::prn(initial.sample);Hmisc::prn(dim(df1));Hmisc::prn(formulaA);Hmisc::prn(ycol)   }

     ############################################################################################################### 
     # Collect sufficient observation numbers from all factor subsets or the entire database if no factors present #
     # Assemble them into a matrix.  The number of rows is initial.sample and the number of columns depends on the # 
     # number of observations.                                                                            #
     # Then run lm() on each row and predict to the entire database. Determine the median of the squared errors of # 
     # each prediction. Select the row that produced the minimum of these mediums.                                 #
     # The formula for the prediction is complete, including constructed covariates and factors.                   #
     ############################################################################################################### 
     # Set up final holding variables
     nobs <- dim(df1)[1]
     Observation <- 1:initial.sample
     SSE <- rep(-99, initial.sample)
     pullN <- max(inner.rank)

                                if(b.d <= 31){print("", quote = FALSE);print(paste(spacehere,"Section 31",sep=" "),quote=FALSE);
                                      Hmisc::prn(inner.rank);Hmisc::prn(pullN)       }

     if(yesfactor){
          nfactsub <- length(inner.rank)
          hold.cands <- candprep(yf=yesfactor, dfa2=df1, fixd.ls=df1.ls, inner.rank=inner.rank, preprnk=pullN, 
                        in.sam=initial.sample, b.d=b.d)                                                       # candprep list
     }        #   yesfactor TRUE
     else{
          hold.cands <- candprep(yf=yesfactor, dfa2=df1, fixd.ls=NULL, inner.rank=inner.rank, preprnk=pullN, 
                       in.sam=initial.sample, b.d=b.d)                                                        # candprep matrix
          hold.cands <- hold.cands[[1]]
     }    # no factors present
     #
     ################################################################################################### 
     # Run the lm function on each row of hold.cands and predict to the entire database for each one   #
     # Identify the median of the squared errors of each predictio.                                    #
     # Calculate the sum of squares of the error for each one amd store it in SSE.                     #
     # Sort this matrix on the 2nd column and locate the median row.  Save this value. Sort the matrix #
     # on this value. The observations on the first row (lowest median SSE are the rim for Step 1      #                                                                  #
     ################################################################################################### 
     if(yesfactor){
          SSErim <- NULL
          rimout.ls <- vector("list", nfactsub)
          this.form <- nofactform
          for(j in 1:nfactsub){
               hold.cands.sub <- hold.cands[[j]]
               hold.cands.sub <- hold.cands.sub[,1:inner.rank[j]]
               SSE <- rep(-99, initial.sample)
               for(r in 1:initial.sample){
                    if(inner.rank[j]==1){
                         index <- hold.cands.sub[r]
                    }
                    else{
                         index <- hold.cands.sub[r,]
                    }
                    xmat <- match(index, df1[,1])
                    smalldata <- df1[xmat,]
                    indexuniv <- smalldata$holdISG[1]
                    universe <- df1[df1$holdISG==indexuniv,]
                    MED <- floor(dim(universe)[1]/2 + .99991)
                    MED[MED==0] <- 1

                    lmsmall <- stats::lm(formula=this.form, data=smalldata, singular.ok=TRUE)                             #    lm
                    predsmall <- stats::predict(lmsmall, data=universe)                                                    # predict

                    errorsmall <- df1[, ycol] - predsmall
                    sserrorsmall <- sort(errorsmall^2)  
                    SSE[r] <- sserrorsmall[MED]
               }     # r

                                if(b.d <= 33){print("", quote = FALSE);print(paste(spacehere,"Section 33",sep=" "),quote=FALSE);
                                      Hmisc::prn(predsmall);Hmisc::prn(SSE)       }

               hold.cands.sub <- cbind(hold.cands.sub, SSE)
               lastcol <- dim(hold.cands.sub)[2]
               hold.cands.sub <- hold.cands.sub[order(hold.cands.sub[,lastcol]),]
               uu <- hold.cands.sub[,-lastcol]

               if(inner.rank[j]==1) uu <- matrix(uu,ncol=1)

               SSErim <- c(SSErim,uu[1,])
               rimout.ls[[j]] <- sort(uu[1,])
          }    #  j 
          rimout <- sort(SSErim)
          nsubs <- length(df1.ls)
          temprims <- rimout
     }      #   yesfactor
     else{
          # hold.cands must be a matrix with last column of 0
          MED <- floor(nobs/2 + 0.00001)             # no factors, so must have at least inner.rank = 2
          for(i in 1:initial.sample){
               index <- hold.cands[i,]         # ith row is set of observation numbers, not row numbers
               dropone <- length(index) 
               index <- index[-dropone]
               smalldata <- df1[index, ]
               this.form <- formulaA

               lmsmall <- stats::lm(formula=this.form, data=smalldata, singular.ok=TRUE)                             #    lm
               predsmall <- stats::predict(lmsmall, data=df1)                                                    # predict 

               errorsmall <- df1[, ycol] - predsmall
               sserrorsmall <- sort(errorsmall^2)  
               SSE[i] <- sserrorsmall[MED]
          }     # i
          hold.cands <- cbind(hold.cands, SSE)
          lastcol <- dim(hold.cands)[2]
          hold.cands <- hold.cands[order(hold.cands[,lastcol]),]
          rimout2 <- hold.cands[1,]
          rimout <- rimout2[-lastcol]
          lastcol <- lastcol-1
          rimout <- rimout[-lastcol]
          nsubs <- length(df1.ls)
          temprims <- rimout
          rimout.ls <- NULL
     }     #   no factors present

                               if(b.d <= 35){print("", quote = FALSE);print(paste(spacehere,"Section 35",sep=" "),quote=FALSE);
                                      Hmisc::prn(rimout);Hmisc::prn(rimout.ls)       }

    allrims <- list(rimout, rimout.ls)
# print("leaving aStep1")

    return(allrims)
}

Any scripts or data that you put into this service are public.

forsearch documentation built on April 4, 2025, 5:52 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

forsearch
Diagnostic Analysis Using Forward Search Procedure for Various Models

R/aStep1.R
In forsearch: Diagnostic Analysis Using Forward Search Procedure for Various Models

Try the forsearch package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

forsearch Diagnostic Analysis Using Forward Search Procedure for Various Models

R/aStep1.R In forsearch: Diagnostic Analysis Using Forward Search Procedure for Various Models

Try the forsearch package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

forsearch
Diagnostic Analysis Using Forward Search Procedure for Various Models

R/aStep1.R
In forsearch: Diagnostic Analysis Using Forward Search Procedure for Various Models