R/sample_ids.r

#' Sampling from panel data
#'
#' @param df Panel data generated by \code{\link{read_panel_id}()}.
#' @param .n Number of samples to be extracted
#' @param .id_var Default "Panel_ID".
#' @param sent_id 
#' @param finished_id 
#' @param include_sent 
#' @param show 
#'
#' @return character vector of extracted IDs.
#' @import dplyr
#'
#' @examples
#' sent_id <- EOLembrainToolbox::read_ids("./exclude_id")
#' finished_id <- c(EOLembrainToolbox::read_ids("./exclude_id/finished_ID"),
#'                  EOLembrainToolbox::eol_report_crawler(survey_id)$Panel_ID)
#' panel_id_all <- read_panel_id()
#' panel_id <- panel_id_all %>% filter(active_grade %in% 1:2)  # active, semi-active
#' panel_id %>%
#' filter(gender %in% 1, age %in% 20:29) %>%
#'   sample_N(2000, sent_id = sent_id, finished_id = finished_id, include_sent = T)
#' 
#' 
#' @export
sample_N <- function(df, .n, .id_var="Panel_ID",
                     sent_id = NULL,
                     finished_id = NULL,
                     include_sent = FALSE,
                     show = TRUE) {
  
  if(.n == 0) {
    if(show == TRUE) cat(paste0("(\u6c92\u6709\u62bd\u6a23)", "\n"))
    return(invisible(NULL))
  }
  
  if(length(finished_id) == 0 & include_sent == TRUE)
    stop("When `include_sent` set to TRUE, length of `finished_id` must not be 0.",
         call. = FALSE)
  
  if(include_sent == FALSE) {
    exclude_id <- c(sent_id, finished_id)
  } else exclude_id <- finished_id
  
  ## exclude id
  filter_criteria <- lazyeval::interp(~ ! id_var %in% exclude_id,
                                      id_var = as.name(.id_var))
  df <- df %>% filter_(filter_criteria)
  
  if(nrow(df) == 0) {
    if(show == TRUE) cat(paste0("(\u6b64\u689d\u4ef6\u5df2\u7121\u6703\u54e1)", "\n"))
    return(invisible(NULL))
  }
  
  include_sent_info <- NULL   # message: 包含已發送
  if(include_sent == TRUE) include_sent_info <- "(\u542b\u5df2\u767c\u9001)"
  
  less_info <- NULL   # message: 缺額
  if(.n > nrow(df)) {
    .n <- nrow(df)
    less_info <- "(\u7f3a\u984d)"
  }
  
  sample_id <- df %>% # exclude id
    dplyr::sample_n(., size = .n, replace = FALSE) %>%
    dplyr::select_(.id_var) %>% unlist %>% unname
  
  if(show == TRUE) {
    cat(paste0(scales::comma(length(sample_id)),
               "\t\u500bID\u88ab\u62bd\u51fa",
               include_sent_info,
               less_info,
               "\n"))
  }
  
  sample_id
}
leoluyi/EOLembrainToolbox documentation built on May 21, 2019, 5:08 a.m.