R/switch.items.R

#' Match values for vector items
#'
#' Match each item in a vector to a source column and return the corresponding target column as a vector.
#' The output of this function is identical to:
#' as.character( sapply(lookup, FUN = function(x) ref[ref[,source_index] == x, target_index]) )
#' but faster for large datasets.
#' @param lookup A vector containing values to replace
#' @param ref The reference dataframe containing the source items to match against and the target items to be returned
#' @param source_index The index of the column containing the items to match against
#' @param target_index The index of the column containing the items to return
#' @param multiple_targets Action in case of 1:many mapping between source and target items.
#' Valid options are "omit" (default) to remove the item, "keep" to keep the lookup item, "first" to use the first target item or "last" to use the last target item.
#' @param no_targets Action if no matching value is found in source index.
#' Valid options are "omit" (default) or "keep" to keep the lookup item.
#' @keywords lookup
#' @export
#' @examples
#' # Example using a random dataset with categorical data:
#' devtools::install_github('martinry/randomcds')
#' df <- randomcds::cds(50, 4) # Generate a dataframe with 50 rows and 4 columns
#' rv <- df[sample.int(50, 10),1] # Create a vector containing a subset (10 items) from the first column in df
#' rv2 <- qob::switch.items(rv, df, 1, 3) # For each item in rv, find the item in df[,1] and return the corresponding item in df[,3]

switch.items <- function (lookup, ref, source_index, target_index, multiple_targets = "omit", no_targets = "omit") {
    output <- lookup
    seen   <- c()
    unmapped_many <- vector()
    unmapped_none <- vector()

    for (i in 1:length(lookup))
    {
        source_item <- as.character(lookup[i])
        if (source_item %in% seen) {
            next
        }
        else {

            if(source_item %in% ref[,source_index]) {

                target_item <- as.character(ref[ref[, source_index] == source_item, target_index])

                if (length(target_item) == 1) {
                    output[output == source_item] <- target_item
                } else {
                    unmapped_many <- c(unmapped_many, source_item)
                    if(multiple_targets == "omit"){
                        output <- output[-(output == source_item)]
                    } else if(multiple_targets == "first") {
                        output[output == source_item] <- target_item[1]
                    } else if(multiple_targets == "last") {
                        output[output == source_item] <- target_item[length(target_item)]
                    } else if (multiple_targets == "NA") {
                        output[output == source_item] <- NA
                    } else {
                        output[output == source_item] <- multiple_targets
                    }

                }

            } else {
                if(is.na(source_item)){
                    print("NAAA")
                # Action if no match is found
                } else {
                    unmapped_none <- c(unmapped_none, source_item)
                    if(no_targets == "omit"){
                        output <- output[!(output == source_item)]
                    } else if (no_targets == "keep") {

                    } else if (no_targets == "NA") {
                        output[output == source_item] <- NA
                    } else {
                        output[output == source_item] <- no_targets
                    }
                }

            }

            seen <- c(seen, source_item)
        }
    }

    #print(paste("1:many targets for:", paste(unmapped_many, collapse = " ")))
    #print(paste("No targets for: ", paste(unmapped_none, collapse = " ")))

    return (output)
}
martinry/qob documentation built on Aug. 30, 2019, 1:14 p.m.