R/ABtest.R

Defines functions ABtest_whk_fn

# 1. CSV 파일 1열 CUST_NO, 2열 REALMB 통일시키고 저장해야함
# 2. REALMB_YN = c("VVIP","VIP","GOLD","ETC") 리얼멤버십 추출 정보 넣어줘야함
# 3. RANDOM_YN = "Y" 등급랜덤으로 고객 추출원할시 "Y" // 리얼멤버십 고려해서 추출원할시 "N"
# 4. SAMPLE_YN = "Y" A/B 샘플수 동일 "Y" // 샘플수 다를시 "N"


library(data.table)
library(plyr)
library(dplyr)
library(sampling)


ABtest_whk_fn <- function(A = A, B = B,
                          REALMB_YN = c("VVIP","VIP","GOLD","ETC"),
                          RANDOM_YN = "N",
                          SAMPLE_YN = "Y",
                          FILE_NAME_A = NULL,
                          FILE_NAME_B = NULL) {

  A <- na.omit(A)
  B <- na.omit(B)

  # 고객 추출
  A <- A %>% unique
  B <- B %>% unique

  B <- B[!B$CUST_NO %in% c(A$CUST_NO),]

  A <- A[A$REALMB %in% REALMB_YN,]
  B <- B[B$REALMB %in% REALMB_YN,]

  if(RANDOM_YN == "Y") {

    dcast_dfA <- A %>% dplyr::group_by( REALMB) %>% dplyr::summarise(모수A = n())  %>% data.frame
    dcast_dfB <- B %>% dplyr::group_by( REALMB) %>% dplyr::summarise(모수B = n())  %>% data.frame
    dcast_df <- merge(dcast_dfA, dcast_dfB, by = "REALMB", all = T)
    dcast_df[is.na(dcast_df)] <- 0

    if (SAMPLE_YN == "Y") {

      print("A/ B 모수 동등 & 리얼멤버십 고려하지 않고 모수 추출")

      set.seed(123)
      RDN_NUM <- sample(max(c(nrow(A), nrow(B))), min(c(nrow(A), nrow(B))), replace = F)
      if(nrow(A)> nrow(B)) {A <- A[RDN_NUM,]} else {B <- B[RDN_NUM,]}
      dcast_dfAA <- A %>% dplyr::group_by( REALMB) %>% dplyr::summarise(모수A = n())  %>% data.frame
      dcast_dfBB <- B %>% dplyr::group_by( REALMB) %>% dplyr::summarise(모수B = n())  %>% data.frame

      dcast_df2 <- merge(dcast_dfAA, dcast_dfBB, by = "REALMB", all = T)
      dcast_df2[is.na(dcast_df2)] <- 0

      dcast_df  <- dcast_df %>% dplyr::rename(리얼멤버십 = "REALMB", A_기존모수 = "모수A",  B_기존모수 = "모수B")
      dcast_df2 <- dcast_df2 %>% dplyr::rename(리얼멤버십 = "REALMB", A_수정됨 = "모수A",  B_수정됨 = "모수B")
      dcast_df  <- merge(dcast_df, dcast_df2 , by = "리얼멤버십", all= T )

      testA <- A %>% arrange(desc(REALMB))
      testB <- B %>% arrange(desc(REALMB))

      # 저장
      write.csv(testA %>% dplyr::select(CUST_NO, REALMB ),
                if(sum(is.na(FILE_NAME_A)) != 0) {paste0("A_멤버십랜덤_모수동일.csv")} else {paste0(FILE_NAME_A,".csv")}
                , row.names = F, fileEncoding = "CP949")
      write.csv(testB %>% dplyr::select(CUST_NO, REALMB ),
                if(sum(is.na(FILE_NAME_B)) != 0) {paste0("B_멤버십랜덤_모수동일.csv")} else {paste0(FILE_NAME_B,".csv")}
                , row.names = F, fileEncoding = "CP949")
    } else {

      print("A/ B 모수 동등하지 않으며 리얼멤버십 고려하지 않고 모수 추출")

      dcast_dfAA <- A %>% dplyr::group_by( REALMB) %>% dplyr::summarise(모수A = n())  %>% data.frame
      dcast_dfBB <- B %>% dplyr::group_by( REALMB) %>% dplyr::summarise(모수B = n())  %>% data.frame

      dcast_df2 <- merge(dcast_dfAA, dcast_dfBB, by = "REALMB", all = T)
      dcast_df2[is.na(dcast_df2)] <- 0

      dcast_df  <- dcast_df %>% dplyr::rename(리얼멤버십 = "REALMB", A_기존모수 = "모수A",  B_기존모수 = "모수B")
      dcast_df2 <- dcast_df2 %>% dplyr::rename(리얼멤버십 = "REALMB", A_수정됨 = "모수A",  B_수정됨 = "모수B")
      dcast_df  <- merge(dcast_df, dcast_df2 , by = "리얼멤버십", all= T )

      testA <- A %>% arrange(desc(REALMB))
      testB <- B %>% arrange(desc(REALMB))

      # 저장
      write.csv(testA %>% dplyr::select(CUST_NO, REALMB ),
                if(sum(is.na(FILE_NAME_A)) != 0) {paste0("A_멤버십랜덤_모수동일X.csv")} else {paste0(FILE_NAME_A,".csv")}
                , row.names = F, fileEncoding = "CP949")
      write.csv(testB %>% dplyr::select(CUST_NO, REALMB ),
                if(sum(is.na(FILE_NAME_B)) != 0) {paste0("B_멤버십랜덤_모수동일X.csv")} else {paste0(FILE_NAME_B,".csv")}
                , row.names = F, fileEncoding = "CP949")

    }


  } else {

    print("리얼멤버십 등급 맞춰 A/B 모수 추출")
    # 모수 추출
    dcast_dfA <- A %>% dplyr::group_by( REALMB) %>% dplyr::summarise(모수A = n())  %>% data.frame
    dcast_dfB <- B %>% dplyr::group_by( REALMB) %>% dplyr::summarise(모수B = n())  %>% data.frame

    dcast_df <- merge(dcast_dfA, dcast_dfB, by = "REALMB", all = T)
    dcast_df[is.na(dcast_df)] <- 0

    dcast_dfA <- dcast_df
    dcast_dfA$모수A <- apply(dcast_dfA[, c("모수A", "모수B")], 1, min)

    ssample_num <- c(
      ifelse(sum(dcast_dfA$모수A[dcast_dfA$REALMB == "VVIP"]) == 0, 0 , dcast_dfA$모수A[dcast_dfA$REALMB == "VVIP"]),
      ifelse(sum(dcast_dfA$모수A[dcast_dfA$REALMB == "VIP"]) == 0, 0 , dcast_dfA$모수A[dcast_dfA$REALMB == "VIP"]),
      ifelse(sum(dcast_dfA$모수A[dcast_dfA$REALMB == "GOLD"]) == 0, 0 , dcast_dfA$모수A[dcast_dfA$REALMB == "GOLD"]),
      ifelse(sum(dcast_dfA$모수A[dcast_dfA$REALMB == "ETC"]) == 0, 0 , dcast_dfA$모수A[dcast_dfA$REALMB == "ETC"])
    )
    A <- A %>% arrange(desc(REALMB))
    B <- B %>% filter(REALMB %in% unique(A$REALMB)) %>% arrange(desc(REALMB))
    set.seed(123)
    testA <- getdata(A, strata("REALMB", size=ssample_num, method="srswor", data= A))
    set.seed(123)
    testB <- getdata(B, strata("REALMB", size=ssample_num, method="srswor", data= B))

    # 저장
    write.csv(testA %>% dplyr::select(CUST_NO, REALMB ),
              if(sum(is.na(FILE_NAME_A)) != 0) {paste0("A_멤버십동일_모수동일.csv")} else {paste0(FILE_NAME_A,".csv")}
              , row.names = F, fileEncoding = "CP949")
    write.csv(testB %>% dplyr::select(CUST_NO, REALMB ),
              if(sum(is.na(FILE_NAME_B)) != 0) {paste0("B_멤버십동일_모수동일.csv")} else {paste0(FILE_NAME_B,".csv")}
              , row.names = F, fileEncoding = "CP949")

    dcast_df <- dcast_df %>% dplyr::rename(리얼멤버십 = "REALMB", A_기존모수 = "모수A",  B_기존모수 = "모수B")
    dcast_df <- merge(dcast_df, dcast_dfA %>% dplyr::select(REALMB, 모수A) , by.x = "리얼멤버십", by.y = "REALMB", all= T )
    dcast_df <- dcast_df %>% dplyr::rename(A_수정됨 = "모수A") %>% dplyr::mutate(B_수정됨 = A_수정됨)

  }

  result <- list(A = testA %>% dplyr::select(CUST_NO, REALMB ),
                 B = testB %>% dplyr::select(CUST_NO, REALMB ),
                 number = dcast_df)
  result$number[is.na(result$number)] <- 0

  print(" ------------  리얼멤버십등급별 모수확인  -------------- ")
  print(dcast_df)
  print(" ------------  총 모수확인  -------------- ")
  print( dcast_df %>% dplyr::summarise(A_기존모수 = sum(A_기존모수,na.rm = T), B_기존모수 = sum(B_기존모수,na.rm = T),
                                       A_수정됨 = sum(A_수정됨,na.rm = T), B_수정됨 = sum(B_수정됨,na.rm = T)) %>%
           t()  )


  return(result)

}
whkim16/ABtest documentation built on Nov. 5, 2019, 12:09 p.m.