R/elle_store.R

elle_store = function(view,top_brand=10, top_product=20, top_source = 5){
  
  # remove useless column
  view$null = NULL
  view$count = NULL
  
  #view$time = as.POSIXct(view$ts,origin = "1970-01-01")
  
  view$page = str_c(view$page,"//")
  loc = strsplit(view$page,"/")
  view$loc1 = sapply(loc, "[[", 4)
  view$loc2 = sapply(loc, "[[", 5)
  
  # unique users
  unique_user = length((unique(view$scid)))
  
  # pageview by page type 
  page_type  = count2(view,loc1)
  
  # TOP Brands
  view_marques = filter(view,loc1 == 'marques'&loc2!="")
  view_marques$loc2t = str_extract(view_marques$loc2,"[\\w|-]+")
  view_marques$loc2t = tolower(view_marques$loc2t)
  brand = count2(view_marques,loc2t)[1:top_brand,]

  # TOP product
  view_product = filter(view,loc1 == 'produit')
  product = count2(view_product,loc2)[1:top_product,]
  
  # traffic source
  view1 = view %>% 
    group_by(scid) %>%
    arrange(ts)%>%
    summarise(rfr = first(referer))
  
  view1$rfr = referer(view1$rfr)  
  source = count2(view1,rfr)[1:top_source,]
  
  # funnel model
  view_livraison = sum(view$loc2=="livraison")
  user_livraison = length(unique(view$scid[view$loc2=="livraison"]))
  
  view_paiement = sum(view$loc2=="paiement")
  user_paiement = length(unique(view$scid[view$loc2=="paiement"]))
  
  view_confirmation = sum(view$loc2=="confirmation")
  user_confirmation = length(unique(view$scid[view$loc2=="confirmation"]))
  
  view_confirmee = sum(view$loc2=="confirmee")
  user_confirmee = length(unique(view$scid[view$loc2=="confirmee"]))
  
  funnel = data.frame(view = c(view_livraison,view_paiement,view_confirmation,view_confirmee),
                      user = c(user_livraison,user_paiement,user_confirmation,user_confirmee),
                      row.names = c("livraison","paiement","confirmation","confirmee"))
  
  
  # ELLE section of traffic source 
  # section  = view1 %>% filter(str_detect(rfr,"www.elle"))
  # section$rfr = str_c(section$rfr,"//")
  # loc = strsplit(section$rfr,"/")
  # section$loc1 = sapply(loc, "[[", 4)
  # section$loc2 = sapply(loc, "[[", 5)
  # section_count = count(section,loc1)
  # section_count$p = section_count$n/sum(section_count$n)
  
  result = list(unique_user,page_type,brand,product,source,funnel)
  names(result) = c("unique_user","page_type","top_brand","top_product","traffic_source","funnel")
  return(result)

}
Meowllo/shopcade documentation built on May 12, 2019, 4:25 p.m.