R/effort_merge.R

#' Merges multiple files that had title/abstract screening efforts 
#' distributed across a team.  
#'
#' Combines (merges) multiple effort_*.csv files within the same directory that 
#' represent the completed screening efforts of multiple team members.  These
#' files were originally generated with \code{\link{effort_distribute}}.
#'
#' @param directory The directory name for the location of multiple .csv files. 
#'    Assumes the current working directory if none is explicitly called.
#'    File names must include the "effort_" string as originally generated by
#'    \code{\link{effort_distribute}}.
#' @param reviewers A vector of reviewer names (strings) used to merge effort 
#'    from a select group of team members.  Must be an even collection (e.g., 
#'    pairs of reviewers) when a dual design was implemented.
#' @param dual When \code{TRUE}, merges files implementing a dual screening
#'    design.  
#'
#' @return A single data.frame merged from multiple files.
#'
#' @examples \dontrun{
#'
#' data(example_references_metagear)
#' theTeam <- c("Christina", "Luc")
#' # warning effort_distribute below, will save two files to working 
#' # directory: effort_Christina.csv and effort_Luc.csv
#' effort_distribute(example_references_metagear, initialize = TRUE, 
#'                   reviewers = theTeam, save_split = TRUE)
#' effort_merge()
#' }
#'
#' @seealso \code{\link{effort_initialize}}, \code{\link{effort_distribute}}, 
#'    \code{\link{effort_summary}}
#'
#' @importFrom utils read.csv
#' @export effort_merge

effort_merge <- function (directory = getwd(),
                         reviewers = NULL, 
                         dual = FALSE) {
  
  aFileList <- list.files(path = directory, pattern = "effort_.*.csv")
  aFileList <- aFileList[grepl(paste(reviewers, collapse = "|"), aFileList)]
  
  if(length(aFileList) == 0) .metagearPROBLEM("error",
                                              paste("Could not find effort_*.csv file from",
                                                    reviewers))
  
  dataFrameList <- lapply(aFileList, 
                          function(x) read.csv(file = file.path(directory, x), 
                                               header = TRUE))
    
  if(dual == TRUE) {
    # group  all team A and B members
    inList <- unlist(lapply(dataFrameList, 
                      function(x) {
                        if(length(grep("REVIEWERS_A", names(x))) != 0) return (TRUE)
                        return (FALSE)
                      }))
    
    team_A <- dataFrameList[inList]; team_B <- dataFrameList[!inList];
    if(length(team_A) != length(team_B)) 
            .metagearPROBLEM("error", "uneven number of team members")
    
    comparedMatrix <- simplify2array(lapply(team_A, 
    function(x) lapply(team_B, function(y) x["STUDY_ID"] %in% y["STUDY_ID"])))
    
    
    for(a in 1:length(team_A)) 
      team_A[[a]] <- merge(team_A[a], 
                           team_B[which(comparedMatrix[, a] == TRUE)],
                           all = TRUE)
    
                           
    dataFrameList <- Reduce(function(...) rbind(...), team_A)
    return(dataFrameList)
  } 
  
  mergedDataFrame <- Reduce(function(...) rbind(...), dataFrameList)
  return(mergedDataFrame)
}

Try the metagear package in your browser

Any scripts or data that you put into this service are public.

metagear documentation built on Feb. 15, 2021, 5:09 p.m.