R/qmboots.R

Defines functions qmboots

Documented in qmboots

qmboots <- function(dataset, nfactors, nsteps, load="auto", rotation="varimax", indet="qindtest", fsi=TRUE, forced=T, distribution=NULL, cor.method="pearson", ...) {
  warning("Note that bootstrapping is an advanced technique in Q methodology, see the help page and relevant reference.")
  if(forced == F) warning("Because the data are specified as non-forced, it's advisable to focus the interpretation on the z-scores (rather than on the factor scores)")
  startime <- Sys.time()
  nstat <- nrow(dataset)
  nqsorts <- ncol(dataset)
  # Number of iterations requested
  itnumber <- paste("Number of requested iterations:", nsteps)
  print(itnumber, quote=FALSE)
  
  #-----------------------------------------------
  # A. create objects and slots for the results
  #-----------------------------------------------
  #bootstrap results
  qmbr <- vector("list", nfactors)
  names(qmbr) <- paste0("factor_", 1:nfactors)
  n <- 1
  while (n <= nfactors) {
    qmbr[[n]] <- list()
    qmbr[[n]]$flagged <- data.frame(matrix(as.logical(NA), nrow=nqsorts, ncol=nsteps, 
                                           dimnames=list(colnames(dataset), 
                                                         paste0("step_",1:nsteps))))
    qmbr[[n]]$zsc     <- data.frame(matrix(as.numeric(NA), nrow=nstat, ncol=nsteps, 
                                           dimnames=list(row.names(dataset), 
                                                         paste0("step_",1:nsteps))))
    qmbr[[n]]$loa     <- data.frame(matrix(as.numeric(NA), nrow=nqsorts, ncol=nsteps, 
                                           dimnames=list(colnames(dataset), 
                                                         paste0("step_",1:nsteps))))
    n <- n+1
  }
  #indeterminacy tests results
  if (indet == "procrustes") {
  } 
  if (indet == "qindtest" | indet == "both") {
    #create dataframe for results and reports from INDETERMINACY issue
    qmts <- list()
    qmts[[1]] <- data.frame(matrix(NA, nrow=nfactors, ncol=nsteps, 
                                   dimnames=list(paste0("f",c(1:nfactors)), paste0("order_",1:nsteps))))
    qmts[[2]] <- data.frame(matrix(NA, nrow=nfactors, ncol=nsteps, 
                                   dimnames=list(paste0("f",c(1:nfactors)), paste0("sign_",1:nsteps))))
    names(qmts) <- c("torder", "tsign")
    qmts_log <- list()
    qmts_log[[1]] <- data.frame(matrix(NA, nrow=1, ncol=nsteps, dimnames=list("log", paste0("log_ord_",1:nsteps))))
    qmts_log[[2]] <- data.frame(matrix(NA, nrow=1, ncol=nsteps, dimnames=list("log", paste0("log_sig_",1:nsteps))))
    names(qmts_log) <- c("torder", "tsign")
  }
  
  #-----------------------------------------------
  # B. full sample Q method loadings (target matrix)
  #-----------------------------------------------
  #use manually introduced matrix of factor loadings as target, if any
  if (is.matrix(load) | is.data.frame(load)) {
    if ((nrow(load) == nqsorts) & (ncol(load) == nfactors)) {
      flagged <- qflag(loa=load, nstat=nstat)
      qm <- qzscores(dataset, nfactors, flagged=flagged, loa=load)
      target <- load
      colnames(target) <- paste0("target_f", 1:nfactors)
    } else stop("Q method input: The factor loading matrix provided in 'load' should have the correct number of Q-sort as rows and the correct number of rotated factors as columns.")
  } else if (is.character(load) & length(load) == 1) {
    if (load == "auto") {
      qm <- qmethod(dataset, nfactors, rotation=rotation, forced=forced, distribution=distribution, cor.method=cor.method, ...)
      flagged <- qm$flagged
      target <- as.matrix(qm$loa)
      colnames(target) <- paste0("target_f", 1:nfactors)
    } else stop("Q method input: 'load' has to be either 'auto' or a matrix.")
  }
  #create vector to build resamples
  #list of number of Q sorts, repeated as many times as bootstrap replications, to ensure that each Q-sort appears the same amount of times
  qsvector <- rep(1:nqsorts, times=nsteps)
  #generate a random order for the previous list
  qsrand <- sample(1:(nqsorts*nsteps), size=(nqsorts*nsteps), replace=FALSE)
  #order qsvector randomly
  qsvector <- qsvector[qsrand]
  
  #-----------------------------------------------
  # C. actual bootstrap
  #-----------------------------------------------
  it_count <- 1
  while (it_count <= nsteps) {
    qp <- 1 + (nqsorts*(it_count-1))
    # Create bootstrap resample
    subdata <- dataset[ , qsvector[c(qp:(qp+(nqsorts-1)))]]
    # Reshape target matrix of original factor loadings
    subtarget <-   target[qsvector[c(qp:(qp+(nqsorts-1)))],]
    # Full bootstrap step
    step_res <- qbstep(subdata=subdata, subtarget=subtarget, 
                       indet, nfactors, nqsorts, nstat, 
                       qmts=qmts, qmts_log=qmts_log, 
                       flagged=flagged, forced=forced, 
                       distribution=distribution, cor.method=cor.method, ...)
    # Export essential results: flagged, zsc and loa
    for (n in 1:nfactors) {
      # z-scores
      qmbr[[n]][["zsc"]][paste0("step_",it_count)] <- step_res$zsc[[n]]
      # Flagged Q-sorts and factor loadings -- important to assign to the correct columns!
      for (r in 1:nqsorts) {
        if (sum(rownames(qmbr[[n]][[3]])[r] == names(subdata)) != 0) { #if the Q sort is in the resample...
          qmbr[[n]][["flagged"]][r,it_count] <- step_res$flagged[[n]][which(rownames(qmbr[[n]][["flagged"]])[r] == names(subdata))]
          qmbr[[n]][["loa"]][r,it_count]    <- step_res$loadings[[n]][which(rownames(qmbr[[n]][["loa"]])[r] == names(subdata))]
        }
        colnames(qmbr[[n]][["flagged"]])[it_count] <- paste0("step_",it_count)
        colnames(qmbr[[n]][["loa"]])[it_count]     <- paste0("step_",it_count)
      }
    }
    # Export results of indeterminacy correction, if any
    if (indet == "both" | indet == "qindtest") {
      # Test results (logical)
      qmts[[1]][paste("order_",it_count, sep="")] <- step_res[[4]]
      qmts[[2]][paste("sign_",it_count, sep="")]  <- step_res[[5]]
      # Reports of solution implementation
      qmts_log[[1]][paste("log_ord_",it_count, sep="")] <- step_res[[6]]
      qmts_log[[2]][paste("log_sig_",it_count, sep="")] <- step_res[[7]]
    }
    # Count the iteration
    it_msg <- paste("Finished iteration number", it_count)
    print(it_msg, quote=FALSE)
    it_count <- it_count + 1
  }
  #---actual bootstrap ends here
  #-----------------------------------------------
  # D. Export indeterminacy results into one object
  #-----------------------------------------------
  qindet <- list()
  if (indet == "none") {
    qindet <- "Caution: no correction of PCA bootstrap indeterminacy issue was performed. This may introduced inflated variability in the results"
  }
  if (indet == "procrustes") {
    qindet <- "Procrustes rotation from MCMCpack was used to solve the PCA indeterminacy issue ('alignment problem')"
  }
  if (indet == "qindtest" | indet == "both") {
    qindet[[1]] <- qmts
    qindet[[2]] <- qmts_log
  }
  
  #-----------------------------------------------
  # E. Test which steps could not be swap corrected
  #-----------------------------------------------
  if (indet == "qindtest" | indet == "both") {
    errmsg <- "ERROR in ORDER swap: at least one factor in the resample is best match for two or more factors in the target"
    badsteps <- which(qindet[[2]][[1]] == errmsg)
    print(paste("Number of steps with order swap error:", 
                length(badsteps)))
    if (sum(qindet[[2]][[1]] == errmsg) == 0) badsteps <- 0
  } else badsteps <- 0
  
  #-----------------------------------------------
  # F. Summary stats for z-scores of bootstrap
  #-----------------------------------------------
  qmbs <- list() # Q method bootstrap summary
  #- - - - - - - - - - - - - - - - - - - - - - - -
  # 1. z-scores
  for (n in 1:nfactors) {
    if (sum(badsteps) > 0 & (indet == "qindtest" | indet == "both")) {
      t.zsc <- qmbr[[n]]$zsc[,-badsteps]
    } else t.zsc <- qmbr[[n]]$zsc
    qmbs[[n+1]] <- merge(describe(t(t.zsc)), 
                         t(apply(t.zsc, 1, quantile, 
                                 probs=c(0.025, 0.25, 0.75, 0.975), 
                                 na.rm=TRUE)), 
                         by='row.names', sort=FALSE)
    rownames(qmbs[[n+1]]) <- qmbs[[n+1]][,1]
    qmbs[[n+1]][,1] <- NULL
  }
  names(qmbs) <- paste0("factor", 0:nfactors)
  #- - - - - - - - - - - - - - - - - - - - - - - -
  # 2. Factor scores (fragment adapted from qzscores.R)
  if (forced==T) qscores <- sort(dataset[,1], decreasing=FALSE)
  if (forced==F) qscores <- distribution
  
  # Build frame for factor scores
  zsc_mea <- data.frame(zsc_mea=c(1:nstat), row.names=row.names(dataset))
  zsc_bn <- data.frame(zsc_bn=c(1:nstat), row.names=row.names(dataset))
  for (f in 1:nfactors) zsc_mea[,f] <- qmbs[[f+1]]$mean
  colnames(zsc_mea) <- paste0("zsc_mea_f", c(1:nfactors))
  for (f in 1:nfactors) {
    for (s in 1:nstat) {
      # Find which statement has the current qscore rank
      statement <- order(zsc_mea[,f])[[s]]
      zsc_bn[statement,f] <- qscores[[s]]
    }
  }
  colnames(zsc_bn) <- paste0("fsc_f", c(1:nfactors))
  qmbs[[1]] <- zsc_bn
  names(qmbs)[1] <- c("Bootstraped factor scores")
  
  #-----------------------------------------------
  # G. Summary stats for Q-sort factor loadings of bootstrap
  #-----------------------------------------------
  qmbl <- list()
  for (n in 1:nfactors) {
    if (sum(badsteps) > 0 & indet == "qindtest" | indet == "both") {
      t.fla <- qmbr[[n]]$flagged[,-badsteps]
      t.loa <- qmbr[[n]]$loa[,-badsteps]
    } else {
      t.fla <- qmbr[[n]]$flagged
      t.loa <- qmbr[[n]]$loa
    }
    # Factor loadings
    qmbl[[n]] <-   merge(describe(t(t.loa)), 
                         t(apply(t.loa, 1, quantile, 
                                 probs=c(0.025, 0.25, 0.75, 0.975), 
                                 na.rm=TRUE)), 
                         by='row.names', sort=FALSE)
    rownames(qmbl[[n]]) <- qmbl[[n]][,1]
    qmbl[[n]][,1] <- NULL
    # Frequency of flagging
    qmbl[[n]]$flag_freq <- rowMeans(t.fla, na.rm=T)
    for (j in 1:nqsorts) {
      if (sum(badsteps) > 0 & indet == "qindtest" | indet == "both") {
        token <- t(qmbr[[n]][[1]][,-badsteps])[,j]
      } else token <- t(qmbr[[n]][[1]])[,j]
      qmbl[[n]][j,"flag_freq"] <- length(which(token == TRUE)) / (length(which(token == TRUE)) + length(which(token == FALSE)))
    }
    qmbl[[n]] <- qmbl[[n]][order(row.names(qmbl[[n]])), ]
    n <- n+1
  }
  
  names(qmbl) <- paste0("factor", 1:nfactors)
  #-----------------------------------------------
  # H. Export everything and report
  #-----------------------------------------------
  qmboots <- list()
  qmboots[[1]] <- qmbs
  qmboots[[2]] <- qmbr
  qmboots[[3]] <- qindet
  qmboots[[4]] <- as.data.frame(matrix(qsvector, nrow=nqsorts, 
                                       dimnames=list(NULL, paste0("bsampl_", 1:nsteps))))
  qmboots[[5]] <- qm
  qmboots[[6]] <- qscores
  qmboots[[7]] <- qmbl
  if (fsi == TRUE) {
    # Calculate FACTOR STABILITY INDEX
    fsii <- qfsi(nfactors=nfactors, nstat=nstat, qscores=qscores, zsc_bn=zsc_bn, qm=qm)
    qmboots[[8]] <- fsii
    names(qmboots) <- c("zscore-stats", "full.bts.res", "indet.tests", "resamples", "orig.res", "q.array", "loa.stats", "fsi")
  } else {
    names(qmboots) <- c("zscore-stats", "full.bts.res", "indet.tests", "resamples", "orig.res", "q.array", "loa.stats")
  }
  fintime <- Sys.time()
  duration <- paste(format(floor(difftime(fintime, startime, units="hours")[[1]]), width=2),":", format(floor(difftime(fintime, startime, units="mins")[[1]] %% 60), width=2),":", format(floor(difftime(fintime, startime, units="secs")[[1]] %% 60), width=2), sep="")
  
  cat("-----------------------------------------------\nBootstrap of ",nsteps," steps\nCall: qmboots(nfactors=",nfactors, ", nstat=", nstat, ", nqsorts=",nqsorts, ", nsteps=",nsteps, ", load=", load, ", rotation=", rotation, ", indet=",indet, ", fsi=",fsi,") \n-----------------------------------------------\nSTARTED : ",format(startime)," \nFINISHED: ", format(Sys.time()), "\nDURATION:            ", duration, " hrs:min:sec","\n-----------------------------------------------\nAlignment correction method: ",indet,"\nNumber of steps that could not be reordered: ",length(badsteps), "\n", sep="")
  
  invisible(qmboots)
}
#TO-DO: set printing method to show the bootstrapped z-scores, factor scores and stability indices

Try the qmethod package in your browser

Any scripts or data that you put into this service are public.

qmethod documentation built on April 1, 2023, 12:18 a.m.