library(dplyr)
library(synapser)
library(RMySQL)
library(yaml)
library(lubridate)
library(synapseusagereports)

foo <- capture.output(synLogin())

options(xtable.type="html")

knitr::opts_chunk$set(
  echo=FALSE,
  warning=FALSE,
  message=FALSE,
  error = FALSE,
  tidy = FALSE,
  fig.width=20)
int_project_id <- gsub("syn", "", params$projectId)
proj <- synGet(params$projectId)

if (is.na(params$teamOrder) | params$teamOrder == '') {
  useTeamGrouping <- FALSE
} else {
  useTeamGrouping <- TRUE
  teamOrder <- stringr::str_split(params$teamOrder, 
                                     pattern=',', 
                                     simplify = FALSE)[[1]]
}


queryData <- readr::read_csv(params$queryDataFile) %>% 
  mutate(userId=as.character(userId))

Activity on Synapse project r proj$properties$name (r params$projectId) from r min(queryData$date) to r max(queryData$date).

if (useTeamGrouping) {
  userList <- processTeamMemberList(teamOrder)
} else {
  userList <- NULL
}
# Get user profile info for users in records

allUsers <- getQueryUserProfiles(queryData, useTeamGrouping, userList)
queryData <- queryData %>%
  left_join(., allUsers) %>%
  dplyr::filter(!is.na(teamName))

Active new and unique registered Synapse users per month, starting from first month in the range:

newUsers <- firstMonthToVisit(queryData) %>% mutate(source='New')
uniqueUsers <- uniqueUsersPerMonth(queryData) %>% mutate(source='Unique')

rbind(newUsers, uniqueUsers) %>% 
  reshape2::dcast(source ~ Date, value.var='Users') %>%
  knitr::kable()

Active registered Synapse users per team (per month):

queryData %>% 
  select(dateGrouping, teamName, userName) %>% 
  filter(teamName != "Anonymous") %>%
  distinct() %>% 
  count(dateGrouping, teamName) %>% 
  reshape2::dcast(teamName ~ dateGrouping, value.var='n') %>% 
  knitr::kable()

There are r length(setdiff(unique(queryData$userName), c("anonymous"))) active registered Synapse users in this time period. Of these, r multiMonthVisits(queryData) %>% nrow users were active in the project in at least two different months.

Entity downloads

The top 50 Files or Folders with at least 5 views.

### Data
tmp <- queryData %>%
  dplyr::filter(recordType %in% c('download', 'filedownloadrecord')) %>% 
  dplyr::count(id, NAME, NODE_TYPE) %>% 
  dplyr::filter(n >= 5) %>% 
  # I don't recall why this is here
  dplyr::filter(!stringr::str_detect(id, "acl"))

if (nrow(tmp) > 0) {
  dataaccessCount1 <- queryData %>% 
    dplyr::filter(recordType %in% c('download', 'filedownloadrecord')) %>% 
    dplyr::filter(id %in% tmp$id) %>% 
    dplyr::count(id, NAME, NODE_TYPE, dateGrouping) %>% 
    dplyr::ungroup() %>%
    reshape2::dcast(id + NAME + NODE_TYPE ~ dateGrouping, value.var = 'n') %>% 
    dplyr::mutate(name = sprintf("<a href='https://www.synapse.org/#!Synapse:syn%s' target='_blank'>%s</a>", id, NAME))
  # dplyr::mutate(name=sprintf("[%s](https://www.synapse.org/#!Synapse:syn%s)", NAME, id))


  dataaccessCount2 <- queryData %>% 
    dplyr::filter(recordType == 'download') %>% 
    dplyr::filter(id %in% tmp$id) %>% 
    dplyr::count(id, NAME, NODE_TYPE) %>% 
    dplyr::ungroup() %>%
    dplyr::arrange(dplyr::desc(n))

  dataaccessCount <- dataaccessCount1 %>% 
    left_join(dataaccessCount2, by = c("id", "NAME", "NODE_TYPE")) %>% 
    dplyr::arrange(dplyr::desc(n)) %>%
    head(50) %>% 
    dplyr::rename(total = n) %>% 
    dplyr::select(name, everything(), total, -id, -NAME)

  dataaccessCount %>% DT::datatable(., options = list(pageLength = 20), 
                                    escape = 1)
}

Report Template Version: r packageVersion("synapseusagereports")



Sage-Bionetworks/synapseusagereports documentation built on May 9, 2020, 12:54 a.m.