library(knitr)

Download data

library(tidyverse)
library(zoo)
library(plotly)
library(cranlogs)


blogposts_dates <- list.files("../") %>% 
  stringr::str_subset("\\.md") %>% 
  stringr::str_remove_all("[^0-9-]") %>% 
  stringr::str_sub(end=-2) %>% 
  lubridate::ymd() %>% 
  as.data.frame() %>% 
  dplyr::filter(. > "2018-04-30") %>% 
  pull() %>% 
  as.character()
blogposts_names <- list.files("../") %>% 
  stringr::str_subset(paste(blogposts_dates,collapse="|")) %>% 
  stringr::str_subset("\\.md") %>% 
  stringr::str_remove_all("[0-9-]") %>% 
  stringr::str_sub(end=-4) 


events <- data.frame(date = c("2018-01-10", "2018-02-05", "2018-03-21"),
                     label = c("JOSS Review", "JOSS Publication", "Blog Creation"),
                     color = c("#2196F3", "#4CAF50", "#FF9800")) %>% 
   mutate_all(as.character) %>% 
  bind_rows(
    data.frame(date = blogposts_dates,
               label = blogposts_names,
               color = "#E91E63") %>% 
      mutate_all(as.character)
    ) %>% 
  mutate(date = lubridate::ymd(date)) 




data <- cranlogs::cran_downloads(package = "psycho", from = "2017-10-06") %>% 
  mutate(cumulative_count = cumsum(count),
         average_count = zoo::rollmax(count, 10, fill=0)+10,
         day_num = as.numeric(date) - min(as.numeric(date)),
         day = weekdays(date),
         month = months(date),
         quarters = quarters(date),
         month_day = lubridate::mday(date),
         event = ifelse(date %in% events$date, 1, 0))

events <- left_join(events, data, by="date")

Trend

library(rstanarm)
library(psycho)

fit <- rstanarm::stan_glm(count~poly(day_num, 5), data=data, algorithm="meanfield")
# fit <- glm(count~poly(day_num, 5), data=data)

psycho::get_predicted(fit, data) %>% 
  ggplot(aes(x=date, group=1)) +
  geom_hline(yintercept=0) +
  geom_line(aes(y=count), size=1) +
  geom_line(aes(y=count_Median), size=1, colour="blue") +
  geom_vline(data=events, aes(xintercept = date), colour=events$color, size=1) +
  geom_label(data=events, aes(label = label, x = date, y = average_count), fill = events$color, colour = "white", fontface="bold") +
  theme_classic() +
  scale_x_date(date_breaks = "1 month", 
               # limits = as.Date(c('2011-01-01','2013-01-01')),
               labels=scales::date_format("%Y-%m")) +
  xlab("") +
  ylab("CRAN Downloads\n")

Day

fit<- rstanarm::stan_glm(count~day, data=data)

results <- get_contrasts(fit, "day")

results$means %>% 
  mutate(Day = str_remove(Level, "day "),
         Day = factor(Day, levels=c("lundi", "mardi", "mercredi", "jeudi","vendredi", "samedi", "dimanche"))) %>% 
  ggplot(aes(x=Day, y=Median, group=1)) +
  geom_linerange(aes(ymin=CI_lower, ymax=CI_higher)) +
  geom_line()

results$contrasts

Machine Learning

# Data Partition
training <- data %>% 
  select(count, day_num, day, month, quarters, month_day, event) %>%
  modelr::resample_partition(c(train = 0.7, test = 0.3))

test <- ungroup(as.data.frame(training$test))
training <- ungroup(as.data.frame(training$train))

# Train
model <- caret::train(count ~ .,
                      data=training,
                      method = "rf",
                      trControl=caret::trainControl(method="repeatedcv",
                                                    number=10,
                                                    repeats=3))
print(model)
varImp(model, scale = TRUE)

# test
test$Predicted <- predict(model, test)
test %>%
  select(pred=Predicted, obs=count) %>%
  as.data.frame() %>%
  defaultSummary()

cbind(data, data.frame(Predicted = predict(model, data))) %>% 
  ggplot(aes(x=count, y=Predicted)) +
  geom_point() +
  geom_smooth()



# Visualize
newdata <- psycho::emotion %>%
  select(-Participant_ID) %>%
  psycho::refdata(target="Subjective_Valence", length.out=100)

Event List

Credits

This package helped you? Don't forget to cite the various packages you used :)

You can cite psycho as follows:

Previous Blogposts



neuropsychology/psycho.R documentation built on Jan. 25, 2021, 7:59 a.m.