Pull in IP addresses, filter to only keep IP addresses over 11 views (I think any 11 or below is a bot, since they are so common)
eyeIntegration.stats
is generated as follows:
Daily_eyeinegration_frequent_IP_access_list
emails in Outlookcat *eml | grep "^[[:digit:]]" > eyeIntegration.stats
library(tidyverse) library(rworldmap) library(ipstack) library(lubridate) library(patchwork) eml_files <- list.files('~/data/eyeIntegration_eml_emails/x/', full.names = TRUE, pattern = '*eml', recursive = TRUE) eml_reader <- function(eml){ raw <- read_lines(eml) tsv <- read_delim(eml, col_names = c('ip','Page Views'), delim = ' ') tsv <- tsv %>% filter(grepl('^\\d', ip)) %>% mutate(`Page Views` = trimws(`Page Views`) %>% as.integer()) tsv$`Page Views` <- gsub('<br>', '', tsv$`Page Views`) chunk <- grep('Date', raw,value = TRUE) %>% substr(., 12,22) %>% str_split(., ' ', simplify = TRUE) date <- lubridate::mdy(paste(chunk[2], chunk[1], chunk[3])) tsv$date <- date tsv } eml_list <- list() for (i in eml_files){ eml_list[[i]] <- eml_reader(i) } ipaddresses <- eml_list %>% bind_rows() %>% unique() %>% mutate(`Page Views` = case_when(!is.na(`Page Views`) ~ `Page Views`, !is.na(X3) ~ X3, !is.na(X4) ~ X4, !is.na(X5) ~ X5, !is.na(X6) ~ X6), `Page Views` = as.numeric(`Page Views`)) %>% filter(`Page Views` > 11) # find missing dates date_span <- Sys.Date() - (ipaddresses$date %>% sort() %>% head(1) ) date_tib <- ((ipaddresses$date %>% sort() %>% head(1)) + days(0:date_span)) %>% tibble::enframe() %>% transmute(date = value) # if loading previous work #load('visitor_stats_2023_04.Rdata') #save(ipaddresses, eml_list, file = 'visitor_stats_2023_04.Rdata')
Total page view from May 2017 to Present
sum(ipaddresses$`Page Views`) #1718117
Chart of page views increasing over time
a <- ipaddresses %>% filter(`Page Views` > 11) %>% group_by(date) %>% summarise(`Visitor Count` = length(unique(ip))) %>% mutate(`Visitor Count` = cumsum(`Visitor Count`)) %>% ggplot(aes(x=date,y=`Visitor Count`)) + geom_line() + cowplot::theme_cowplot() + xlab('Date') + ylab('eyeIntegration\nCumulative\nVisits') + expand_limits(x = as.Date(c("2017-05-01", "2023-05-01"))) a
Get latitude and longitude for all unique IPs (takes a few minutes to query)
unique_IP <- ipaddresses$ip %>% unique() ip_info <- list() # ipstack for (i in unique_IP){ if (!(i %in% names(ip_info))){ ip_info[[i]] <- ip_lookup(i, ipstack_api_key = '7d06f26c4f0f61bd9edd20df0219f874', hostname = TRUE) } } # ip_info_ipapi <- list() # for (i in unique_IP){ # if (!(i %in% names(ip_info_ipapi))){ # print(i) # ip_info_ipapi[[i]] <- ipapi::geolocate(i, .progress = FALSE) # Sys.sleep(2) # } # } processed_ip <- list() for (i in names(ip_info)){ ip <- ip_info[[i]]$ip isp <- ip_info[[i]]$connection$isp %>% as.character() lat <- ip_info[[i]]$latitude %>% as.character() lon <- ip_info[[i]]$longitude %>% as.character() city <- ip_info[[i]]$city %>% as.character() country <- ip_info[[i]]$country_name processed_ip[[i]] <- cbind(ip, isp, lat, lon, country, city) %>% as_tibble() } ip_tibble <- processed_ip %>% bind_rows() ip_tibble %>% filter(grepl('Insti|Univ|Hosp|Med|Scie|Tech|Coll|Health|Verein|State|Labora|State|Riken|Biome|Agenc|China Educa|Merck', isp))
usage_by_academia_pharma <- ipaddresses %>% left_join(ip_tibble) %>% group_by(isp) %>% summarise(Usage = sum(`Page Views`), Visits = n()) %>% arrange(-Usage) %>% filter(grepl('Insti|Univ|Hosp|Med|Scie|Tech|Coll|Health|Verein|State|Labora|State|Riken|Biome|Agenc|China Educa|Merck', isp), isp != 'Nus Information Technology', isp != 'Virgin Media Limited') %>% mutate(isp = case_when(grepl("Verein", isp) ~ 'German National\nResearch and Education Network', TRUE ~ isp)) %>% data.frame() top15 <- usage_by_academia_pharma %>% head(15) %>% pull(isp) top25 <- ipaddresses %>% left_join(ip_tibble) %>% group_by(isp) %>% summarise(Usage = sum(`Page Views`)) %>% arrange(-Usage) %>% filter(grepl('Insti|Univ|Hosp|Med|Scie|Tech|Coll|Health|Verein|State|Labora|State|Riken|Biome|Agenc|China Educa|Merck', isp), isp != 'Nus Information Technology', isp != 'Virgin Media Limited') %>% head(25) %>% pull(isp)
ipaddresses %>% left_join(ip_tibble %>% group_by(ip, isp) %>% summarise(Count = n())) %>% filter(`Page Views` > 11) %>% filter(isp %in% top25) %>% group_by(isp, date) %>% summarise(`Usage` = sum(`Page Views`)) %>% mutate(`Usage` = cumsum(`Usage`)) %>% mutate(isp = case_when(grepl("Verein", isp) ~ 'German National\nResearch and Education Network', TRUE ~ isp)) %>% ggplot(aes(x=date,y=Usage)) + geom_line() + cowplot::theme_cowplot() + xlab('Date') + ylab('eyeIntegration\nCumulative\nVisits') + expand_limits(x = as.Date(c("2017-05-01", "2023-05-01"))) + facet_wrap(~isp, scales = 'free', labeller = label_wrap_gen(20))
Save
save(ip_tibble, ipaddresses, ip_info, file = '~/git/eyeIntegration_app/visitor_stats_2023_04.Rdata')
Interactive Map
# library(ggiraph) # world <- map_data("world") # world <- world[world$region != "Antarctica",] #world <- c(geom_polygon(aes(long,lat,group=group), size = 0.1, colour= "#090D2A", fill="white", alpha=0.8, data=worldmap)) # code <- ggplot() + geom_map(data=world, map=world, # aes(x=long, y=lat, map_id=region), # color="#090D2A", fill="white", size=0.05, alpha=1/4) + # geom_point_interactive(aes(x = geo_locate$longitudes, y = geo_locate$latitudes, tooltip=geo_locate$cities)) + theme_void() # ggiraph(code = print(code) )
Regular Map
top15_coords <- ip_tibble %>% filter(isp %in% top15) %>% group_by(isp) %>% summarise(lon = mean(as.numeric(lon)), lat = mean(as.numeric(lat))) world <- map_data("world") world <- world[world$region != "Antarctica",] ggplot(data = ip_tibble, aes(x = as.numeric(lon), y = as.numeric(lat))) + geom_point(color = 'black', size = 0.4) + geom_point(data = ip_tibble %>% filter(grepl('Insti|Univ|Hosp|Med|Scie|Tech|Coll|Health|Verein|State|Labora|State|Riken|Biome|Agenc|China Educa|Merck', isp)), color = 'Red', size = 0.5) + theme_void() + ggsci::scale_color_aaas() + geom_map(data=world %>% mutate(Institution = 'placeholder'), map=world, aes(x=long, y=lat, map_id=region), color="#090D2A", fill="lightgray", size=0.05, alpha=1/4) + ggtitle('eyeIntegration has been used across 84 countries,\n250 research institutions, and 1465 cities')
ggplot(data = ip_tibble, aes(x = as.numeric(lon), y = as.numeric(lat))) + geom_point(color = 'black', size = 0.4) + theme_void() + scale_color_viridis_d() + geom_map(data=world, map=world, aes(x=long, y=lat, map_id=region), color="#090D2A", fill="lightgray", size=0.05, alpha=1/4) + ggrepel::geom_label_repel(data = top15_coords, aes(x=lon,y=lat,label = isp), max.overlaps = Inf, force = 15, force_pull = 0.2) + ggtitle("Top 15 Research Institution Users of eyeIntegration")
isp_location <- ipaddresses %>% left_join(ip_tibble, by = c('ip' = 'query')) %>% group_by(isp) %>% summarise(Usage = sum(`Page Views`), lat = mean(lat), lon = mean(lon)) isp_location_academic <- isp_location %>% filter(grepl('Agency|Acad|Scie|Centr|Center|Schoo|Univ|Hosp|Stat|Colle|Resea|Publ|Nation|Instit|Medici', isp), !grepl('BT Public', isp)) %>% arrange(-Usage) c <- isp_location_academic %>% head(25) %>% mutate(isp = factor(isp, levels = rev(isp_location_academic$isp))) %>% ggplot(aes(x=isp, y = Usage)) + geom_bar(stat = 'identity', fill = 'darkblue') + ggbreak::scale_y_break(c(35000, 240000)) + scale_y_continuous(breaks = c(5000, 15000, 25000, 240000)) + coord_flip() + cowplot::theme_cowplot() + xlab('Top 25\nResearch Users') + ylab('Page Views') + theme(text = element_text(size = 20)) b <- ggplot(data = isp_location, aes(x = lon, y = lat) ) + geom_map(data=world, map=world, aes(x=long, y=lat, map_id=region), color="#090D2A", fill="lightgray", size=0.05, alpha=1/4) + geom_point(color = 'black', size = 4, alpha = 0.5) + geom_point(data = isp_location_academic, color = 'darkblue', size = 5, alpha = 0.7) + theme_void() + scale_color_viridis_c() first <- (a + c) + plot_layout(widths = c(1, 2)) + plot_annotation(theme = theme(text = element_text(size = 25))) first / b + plot_layout(heights = c(1, 1.4)) + plot_annotation( title = 'eyeIntegration Worldwide Usage', subtitle = glue::glue('Users from {ip_tibble$country %>% unique() %>% length()} countries, {ip_tibble$city %>% unique() %>% length()} cities, and {isp_location_academic %>% nrow} research institutions.'), caption = 'Blue points are research instituations', theme = theme(text = element_text(size = 25)) )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.