library(knitr) library(ggplot2) library(tidyr) library(dplyr) knitr::opts_chunk$set(fig.path='figure/', cache.path='cache/', fig.width=6, fig.height=3.5, cache=TRUE)
## Set wd to skaze wd <- '/Volumes/Stockage/Google Drive/skaze/' setwd(wd)
clean.list <- function(l) { l[sapply(l, is.null)] <- NA if(is.list(l)){ l <- lapply(l, function(ll) { if(length(ll)>0) clean.list(ll) else NA }) } return(l) } max.length.list <- function(l){ if(is.list(l)) { tmp <- sapply(l, max.length.list) } else { tmp <- length(l) }; tmp } # Define custom as.data.frame for list list_df <- function(list) { df <- as.data.frame(list) names(df) <- list_names(list) return (df) } list_names <- function(list) { recursor <- function(list, names) { if (is.list(list)) { new_names <- paste(names, names(list), sep = ".") out <- unlist(mapply(list, new_names, FUN = recursor)) } else { out <- names } return(out) } new_names <- unlist(mapply(list, names(list), FUN = recursor)) return(new_names) } list_unfold <- function(l) { if(is.list(l)){ out <- lapply(l, list_unfold) } else { if(is.null(names(l))) { out <- l } else { out <- as.list(l) } } return(out) } loadLog <- function(filename){ cat('Loading file', filename, '\n') js.list <- readLines(paste0('../Data/skaze/', filename)) nJson <- length(js.list) dt <- lapply(seq(js.list), function(js){ cat(' - reading', js,'of', nJson, 'JSON input\n') # get the json string js <- js.list[js] # put NA instead of empty entries l <- clean.list(RJSONIO::fromJSON(js)) # unfold list, ie create list instead of names vectors l <- list_unfold(l) # useless info: vector of languages l$context$navigator$languages <- NULL # parse cookie info l$server$HTTP_COOKIE <- gsub(x = l$server$HTTP_COOKIE, pattern = " ", replacement ="") tmp <- strsplit(strsplit(l$server$HTTP_COOKIE, split = ';')[[1]], split = "=") if(length(tmp)>0) { nm <- sapply(tmp, function(v) v[1]) l$server$HTTP_COOKIE <- lapply(tmp, function(v) paste0(tail(v, -1), collapse = "=")) names(l$server$HTTP_COOKIE) <- nm } # parse search info tmp <- strsplit(strsplit(gsub(x = l$context$location$search, pattern = "\\?", replacement = ""), split = "&")[[1]], split = "=") if(length(tmp)>0){ l$context$location$search <- as.list(unlist(lapply(tmp, function(l) {v <- l[2];names(v) <- l[1]; return(v)}))) } # get if nagivation on mobile l$context$location$mobile <- length(grep(l$context$location$host, pattern = "m\\.|mobile\\."))>0 list_df(l) }) }
lf <- list.files(path = paste0(wd, "logs"), pattern = '.log.bz2', full.names = TRUE) cat(length(lf), "logs trouvés \n") days <- lubridate::ymd(gsub(pattern = ".log.bz2", replacement = "", x = lf)) cat('Logs du', as.character(min(days)), "au", as.character(max(days)), "\n") creative <- lapply(head(lf,15), function(filename){ cat('Loading file', filename, '\n') text <- scan(file = filename, what = character()) grep(pattern = "creative", x = text, value = TRUE) }) logdata <- lapply(lf, loadLog) %>% # change all variables into character to avoid conflict of types lapply(function(x) mutate_each(x, funs('as.character'))) %>% # bind the results to get one single data.frame dplyr::bind_rows() # Format date entries tz <- substring(dt$context.date, first = 26, last = 28) if(length(table(tz))==1) { tz <- tz[1] dt$context.date <- strptime(substring(dt$context.date, first = 0, last = 33), paste0("%a %b %d %Y %H:%M:%S ",tz, "%z"), tz = tz) } else { dt$context.date <- do.call(c, lapply(dt$context.date, function(d) { tz <- substring(d, first = 26, last = 28) strptime(substring(d, first = 0, last = 33), paste0("%a %b %d %Y %H:%M:%S ",tz, "%z"), tz = tz) })) } dt$server.date <- lubridate::ymd_hms(dt$server.date) # correct text formatting dt$context.location.search.libelle <- gsub(dt$context.location.search.libelle, pattern = "%20", replacement = " ") dt$context.location.search.expression <- gsub(dt$context.location.search.expression, pattern = "%20", replacement = " ") # re-arange column by names dt <- dt[, sort(names(dt), index.return=TRUE)$ix] # delete unrelevant variables ## only NAs dt <- dt[,apply(dt, 2, function(col) sum(is.na(col))<nrow(dt))] ## only one level dt <- dt[,apply(dt, 2, function(col) length(table(col)))>1]
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.