R/aStep2.R

aStep2 <-
function (yesfactor, form.A2, finalm, rimbs, onlyfactor=FALSE, dfa2, finalm.ls=NULL, ycol, mstart, rnk, b.d) 
{

     #                                            aStep2 
     #
     # VALUE        A list of 4 elements. First element contains an updated list of the rim during Step 2 
     #                 Second element of the primary list is saved lm output for subsequent extraction of statistics. 
     #                 If there are no factors, the first list contains only the first element above. 
     #
     # INPUT yesfactor         True or False for presence of factors
     #       form.A2           Formula for analysis of entire dataset
     #       finalm            See VALUE above. finalm argument is the same but only for Step 1 values
     #       rimbs             List, each element is a complete matrix of obs numbers and corresponding subset codes
     #       dfa2              Data frame being analyzed by forward search. 
     #       finalm.ls         List of rim by factor subset, if yesfactor; otherwise, NULL
     #       ycol              Response column number, including 1 for Observation
     #       mstart            First subset to be defined
     #       rnk               Rank of X matrix. For factors, this is rank with factors removed.
     #       b.d               Number at which to begin diagnostic listings
     #
     spacer <- "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX        aStep2               "
     nfacts <- length(rimbs)
     countSubs <- rep(-99, nfacts)
     for(i in 1:nfacts){
          temprim <- finalm.ls[[mstart-1]]
          countSubs[i] <- length(temprim[[i]])
     }     # i

                                 if(b.d <= 60){print("", quote = FALSE);print(paste(spacer,"Section 60",sep=" "),quote=FALSE);
                                      Hmisc::prn(form.A2);Hmisc::prn(rimbs);Hmisc::prn(countSubs)       }

     nobs <- dim(dfa2)[1]

     fooResult <- vector("list", nobs)
     fooResult[[nobs]] <- stats::lm(formula=form.A2, data=dfa2, singular.OK=TRUE, x=TRUE, y=TRUE)                          # last lm

     finalm[[nobs]] <- 1:nobs
     predictions.base <- data.frame(Observation <- 1:nobs, Diffs <- rep(-999, nobs))
     names(predictions.base) <- c("Observation", "Diffs2")
     xtemp.list <- vector("list", nobs)
     modCook <- rep(0,nobs)
     param.est <- matrix(0,nrow=rnk, ncol=nobs)
     residuals2 <- matrix(0,nobs,nobs)
     #
     if(yesfactor){
          nlevels <- length(rimbs)
          predictions.base <- data.frame(Observation <- 1:nobs, Subset <- rep("A",nobs), Diffs <- rep(-999, nobs))
          names(predictions.base) <- c("Observation", "Subset", "Diffs2")
          matrimbs <- NULL
          for(ni in 1:length(rimbs)){
               matrimbs <- rbind(matrimbs, rimbs[[ni]])
          }     # ni
          matrimbs <- matrimbs[order(matrimbs[,1]),]
          predictions.base$Subset <- matrimbs[,2]

                                 if(b.d <= 62){print("", quote = FALSE);print(paste(spacer,"Section 62",sep=" "),quote=FALSE);
                                      Hmisc::prn(matrimbs);Hmisc::prn(predictions.base)       }

          ##############################
          # Begin loop creating step 2 #
          ##############################

          for(i in mstart:(nobs-1)){
               predictions <- predictions.base
               rim <- finalm[[i-1]]
               thisdf1 <- dfa2[rim,]

               lmthis <- stats::lm(formula=form.A2, data=thisdf1, singular.OK=TRUE, x=TRUE, y=TRUE)              # lm
               fooResult[[i]] <- lmthis
               preds <- stats::predict(lmthis, dfa2)                                                             # predict

               residuals2[,i] <- preds - dfa2[,ycol]
               predictions[,3] <- (preds - dfa2[,ycol])^2 
               predictions <- predictions[order(predictions[,3]),]
               temppred <- predictions[1:i,]
               temppred <- temppred[order(temppred[,1]),]
               
                           if(b.d <= 69 ){ print("",quote=FALSE);print(paste(spacer,"Section 69",sep=" "),quote=FALSE);
                      #           Hmisc::prn(dfa2[rim,]);Hmisc::prn(rim);
                                print("The following is from step 1:"); Hmisc::prn(dfa2[1:(mstart-1),]);
                                print("The following is the start of step 2:");Hmisc::prn(sort(dfa2[1:(mstart-1),1]));
                                stop("in bStep2 b.d 69")   }

               ##########################################################################
               # Get obs numbers for initial set of countSubs obs in each factor subset #
               ##########################################################################
               collect.final <- NULL
               for(j in 1:nlevels){
                    finalStage <- NULL

                    uu <- predictions[predictions[,2]==names(rimbs)[j],] 
                    firstrnk <- uu[,1]
                    finalStage <- rbind(finalStage,firstrnk[1:countSubs[j] ])
                    collect.final <- c(collect.final, finalStage)
               }      # j

                           if(b.d <= 69 ){ print("",quote=FALSE);print(paste(spacer,"Section 69",sep=" "),quote=FALSE);
                                 Hmisc::prn(dfa2[rim,]);Hmisc::prn(rim);
                                 print("The following is step 1:");Hmisc::prn(dfa2[1:(mstart-1),]);
                                 print("The following is the srart of step 2:");Hmisc::prn(sort(dfa2[1:(mstart-1),1]));          stop("in bStep2 b.d 69")   }

 
              #
               ###################################################
               # Add observation numbers to bring number up to i #
               ###################################################
               remainder <- match(collect.final,predictions[,1])
               remainder.mat <- predictions[-remainder,]

                                 if(b.d <= 70){print("", quote = FALSE);print(paste(spacer,"Section 70",sep=" "),quote=FALSE);
                                      Hmisc::prn(remainder.mat[order(remainder.mat$Subset,remainder.mat$Diffs2),])       }

               nfinal <- length(collect.final)
               needed <- remainder.mat[1:(i - nfinal),]
               needed.1 <- needed[,1]
               finalm[[i]] <- sort(c(collect.final, needed.1))

                                 if(b.d <= 71){print("", quote = FALSE);print(paste(spacer,"Section 71",sep=" "),quote=FALSE);
                                      Hmisc::prn(needed.1);Hmisc::prn(finalm)       }

          }       #  i
          sigma <- sqrt(sum(predictions[,3])/(nobs-rnk))      # here, we're using an overall estimate, not by factor subsets
     }         # factors present    ###########################################################################################
     else{
          for(i in mstart:(nobs-1)){
               predictions <- predictions.base
               rim <- finalm[[i-1]]
               thisdf1 <- dfa2[rim,]

               lmthis <- stats::lm(formula=form.A2, data=thisdf1, singular.OK=TRUE, x=TRUE, y=TRUE)                         # lm
               fooResult[[i]] <- lmthis

               preds <- stats::predict(lmthis, dfa2)
               residuals2[,i] <- preds - dfa2[,ycol]
               predictions[,2] <- (preds - dfa2[,ycol])^2                   # this used to be medaugx[,2]
               predictions <- predictions[order(predictions[,2]),]
               finalm[[i]] <- predictions[1:i,1]

                                 if(b.d <= 72){print("", quote = FALSE);print(paste(spacer,"Section 72",sep=" "),quote=FALSE);

                                      Hmisc::prn(i);Hmisc::prn(thisdf1);Hmisc::prn(predictions);Hmisc::prn(finalm[[i]])       }
          }    #   i
          sigma <- sqrt(sum(predictions[,2])/(nobs-rnk))
     }               # no factors present  

     outlist <- list(finalm, fooResult, residuals2, sigma)

     return(outlist)
}

Try the forsearch package in your browser

Any scripts or data that you put into this service are public.

forsearch documentation built on April 4, 2025, 5:52 a.m.