R/response_qc.R

#' @title QC for duplicates.
#'
#' @description Finds and flags duplicate responses.
#'
#' @details
#' Finds and flags duplicate responses
#' Removes only the second instance of a duplicate
#' Passes back removed responses
#' Dependent on CRM column naming conventions
#' Judges duplicates based on 'ClientCandidateId', 'Form',
#' 'AttemptNumber', & 'ItemName'
#' Duplicates flag column is 'duplicate'.
#'
#' @param responses The responses dataframe.
#' @return The responses dataframe tagged for duplicates.
#' @examples qc.duplicates(ResponseImport)
#' @family Response QC functions
qc.duplicates <- function(responses) {
  responses$duplicate <- duplicated(responses[,c("ClientCandidateId", "Form","AttemptNumber", "ItemName")])

  return(responses)
}

#' @title QC for invalid records.
#'
#' @description Finds and flags invalid records.
#'
#' @details
#' Counts test taker responses.
#' If a testaker has > median count OR < median count -10,
#' all their responses are flagged.
#' Invalid records flag column is 'invalidRecord'.
#'
#' @param responses The responses dataframe.
#' @return The responses dataframe tagged for invalid records.
#' @examples qc.invalid_records(ResponseImport)
#' @family Response QC functions
qc.invalid_records <- function(responses) {
  ResponseTemp <- responses
  ResponseTemp$count <- 1

  UserAggregate <- aggregate(x = list(Count = ResponseTemp$count),
                             by = list(User = ResponseTemp$ClientCandidateId, Form = ResponseTemp$Form,
                                       Attempt = ResponseTemp$AttemptNumber),
                             "sum")

  UserAggregate$invalidRecord <- ifelse(UserAggregate$Count > median(UserAggregate$Count), 1,
                                          ifelse(UserAggregate$Count < median(UserAggregate$Count)-10, 1, 0))


  responses <- merge(responses, UserAggregate[,c("User", "Form","Attempt","invalidRecord")], by.x = c("ClientCandidateId", "Form","AttemptNumber"),
                          by.y = c("User", "Form", "Attempt"), all.x = TRUE)
  return(responses)
}

#' @title QC for invalid scores.
#'
#' @description Finds and flags invalid scores.
#'
#' @details
#' Sums test taker response scores.
#' If a testtakers official and counted score do not match,
#' all their responses are flagged.
#' Invalid score flag column is 'invalidScore'.
#'
#' @param responses The responses dataframe.
#' @return The responses dataframe tagged for invalid scores.
#' @examples qc.invalid_scores(ResponseImport)
#' @family Response QC functions
qc.invalid_scores <- function(responses) {
  ResponseTemp <- subset(ResponseImport, ResponseImport$ItemStatus == "Scored")

  UserScore <- aggregate(x = list(CalcScore = ResponseTemp$ItemScore),
                         by = list(User = ResponseTemp$ClientCandidateId, Form = ResponseTemp$Form,
                                   Attempt = ResponseTemp$AttemptNumber, ProvScore = ResponseTemp$ExamRawScore), "sum")

  UserScore$invalidScore <- with(UserScore, ifelse(CalcScore != ProvScore, 1, 0))

  ResponseImport <- merge(ResponseImport, UserScore[,c("User", "Form", "Attempt","invalidScore")], by.x = c("ClientCandidateId", "Form","AttemptNumber"),
                          by.y = c("User", "Form","Attempt"), all.x = TRUE)
  return(ResponseImport)
}

#' @title QC for omissions.
#'
#' @description Finds and flags frequent ommissions.
#'
#' @details
#' Counts number of ommissions for each test taker.
#' If the number of test taker ommissions is >= 10,
#' all responses by that test taker are flagged.
#' Omits flag column is 'omits'.
#'
#' @param responses The responses dataframe.
#' @return The responses dataframe tagged for ommissions.
#' @examples qc.omits(ResponseImport)
#' @family Response QC functions
qc.omits <- function(responses) {
  ResponseTemp <- subset(responses, is.na(responses$CandidateResponse))
  ResponseTemp$count <- 1

  UserOmits <- aggregate(x = list(Count = ResponseTemp$count),
                         by = list(User = ResponseTemp$ClientCandidateId, Form = ResponseTemp$Form,
                                   Attempt = ResponseTemp$AttemptNumber), "sum")

  UserOmits$omits <- ifelse(UserOmits$Count >= 10, 1, 0)

  responses <- merge(responses, UserOmits[,c("User", "Form", "Attempt","omits")], by.x = c("ClientCandidateId", "Form","AttemptNumber"),
                          by.y = c("User", "Form","Attempt"), all.x = TRUE)
  responses$omits[is.na(responses$omits)] <- 0

  return(responses)
}

#' @title QC for outliers.
#'
#' @description Finds and flags outlier scores.
#'
#' @details
#' Calculates Inter-Quartile Range for official test taker scores.
#' Flags all responses for test-takers with outlier scores
#' (<= lower-mid quartile - 1.5*IQR OR
#' >= upper-mid quartile + 1.5*IQR).
#' Outlier flag column is 'outlier'.
#'
#' @param responses The response dataframe.
#' @return The responses dataframe tagged for outliers.
#' @examples qc.outliers(ResponseImport)
#' @family Response QC functions
qc.outliers <- function(responses) {
  ScoreOutliers <- unique(ResponseImport[,c("ClientCandidateId", "Form","AttemptNumber", "ExamRawScore")])

  scorequantiles <- quantile(ScoreOutliers$ExamRawScore)
  IQR <- scorequantiles[4] - scorequantiles[2]

  ScoreOutliers$outlier <- with(ScoreOutliers, ifelse(ExamRawScore <= scorequantiles[2] - (1.5 * IQR), 1,
                                                      ifelse(ExamRawScore >= (1.5 * IQR) + scorequantiles[4], 1, 0)))


  ResponseImport <- merge(ResponseImport, ScoreOutliers[,c("ClientCandidateId", "Form","AttemptNumber","outlier")],
                          by = c("ClientCandidateId", "Form","AttemptNumber"), all.x = TRUE)

  return(responses)
}

#' @title Run all response QC procedures.
#'
#' @description Complete all QC procedures.
#'
#' @details Calls all 5 QC procedures on response dataframe.
#'
#' @param responses The response dataframe.
#' @return The responses dataframe tagged for all QC issues.
#' @examples
#' head(ResponseImport) #A responses dataframe from a CRM import
#' ResponseImport <- qc.run(ResponseImport)
#' badResponses <- qc.archive(ResponseImport)
#' ResponseImport <- qc.clean(ResponseImport)
#' @family Response QC functions
qc.run <- function(responses) {
  responses <- qc.outliers(
                qc.omits(
                  qc.invalid_scores(
                    qc.invalid_records(
                      qc.duplicates(
                        responses
                      )
                    )
                  )
                )
              )
  responses$qcResult <- rowSums(ResponseImport[,
                            c("duplicate", "invalidRecord", "invalidScore",
                              "omits", "outlier")], na.rm = TRUE)
  return(responses)
}

#' @title Archive flagged responses
#'
#' @description Creates subset of flagged responses.
#'
#' @details Creates a subset of the response dataframe including only
#' those responses which were flagged in the QC process.
#'
#' @param responses The response dataframe.
#' @return Subset of responses dataframe with only flagged responses.
#' @examples
#' head(ResponseImport) #A responses dataframe from a CRM import
#' ResponseImport <- qc.run(ResponseImport)
#' badResponses <- qc.archive(ResponseImport)
#' ResponseImport <- qc.clean(ResponseImport)
#' @family Response QC functions
qc.archive <- function(responses) {
  return(subset(responses, responses$qcResult >= 1))
}

#' @title Clean flagged responses.
#'
#' @description Remove all flagged responses.
#'
#' @details Removes all responses flagged in the qc process
#' from the response dataframe.
#' Removes QC flag columns.
#'
#' @param responses The response dataframe.
#' @return The responses dataframe cleaned of all QC issues.
#' @examples
#' head(ResponseImport) #A responses dataframe from a CRM import
#' ResponseImport <- qc.run(ResponseImport)
#' badResponses <- qc.archive(ResponseImport)
#' ResponseImport <- qc.clean(ResponseImport)
#' @family Response QC functions
qc.clean <- function(responses) {
  responses <- subset(responses, responses$qcResult == 0)
  return(responses[, -c('duplicate', 'invalidRecord', 'invalidScore', 'omits', 'outlier', 'qcResult')])
}
m070ch/ips.tools documentation built on May 18, 2019, 8:09 p.m.