inst/doc/introduction-to-smoothy.R

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

options(rmarkdown.html_vignette.check_title = FALSE)

## ----echo=TRUE,eval=F---------------------------------------------------------
#  install.packages('smoothy')

## ----setup, warning=FALSE,echo=T----------------------------------------------
library(smoothy)
library(tidyr)
library(knitr)

## ----data---------------------------------------------------------------------
data(drugstreatment)


## ----data_tab, echo=FALSE, results='asis'-------------------------------------
kable(head(drugstreatment),format='markdown')


## ----single-------------------------------------------------------------------
my_data <- dplyr::filter(drugstreatment, id == "62eb1ebd-1b49-4ba7-9af9-ec0c4ada0956")

## ----parse--------------------------------------------------------------------
structured_df <- smooth_parse(
  id = my_data$id,
  start_date = my_data$start_date,
  end_date = my_data$end_date,
  drug = my_data$drug,
  study_from = "1970-01-01",
  study_to = "1975-01-01"
)

## ----parse_tab, echo=FALSE, results='asis'------------------------------------
kable(head(structured_df),format='markdown')


## ----smooth-------------------------------------------------------------------
smoothed <- smooth_algorithm(
  id = structured_df$id, 
  treatment = structured_df$treatment, 
  day = structured_df$day, 
  N = structured_df$N, 
  width = 61
)

## ----smooth_tab, echo=FALSE, results='asis'-----------------------------------
kable(head(smoothed),format='markdown')


## ----deparse------------------------------------------------------------------
deparsed.sm <- smooth_deparse(smoothed$id, smoothed$day, smoothed$smoothed_treatment)

## ----deparsed_tab, echo=FALSE, results='asis'---------------------------------
knitr::kable(deparsed.sm,format='markdown')


## ----deparse0-----------------------------------------------------------------
deparsed <- smooth_deparse(smoothed$id, smoothed$day, smoothed$treatment)

## ----count diff, eval=FALSE---------------------------------------------------
#  smooth_diff(treatment = smoothed$treatment, smoothed_treatment = smoothed$smoothed_treatment)

## ----count diff_tab, echo=FALSE, results='asis'-------------------------------
kable(smooth_diff(treatment = smoothed$treatment, smoothed_treatment = smoothed$smoothed_treatment),format='markdown')

## ----viz, fig.show='hold',eval=T,echo=T,fig.align='center',fig.height=4,fig.width=8----
require(ggplot2)
require(gridExtra)

tts = unique(deparsed$treatment)
tts = tts[tts!='None']
yorder = c('None',tts[order(nchar(tts), tts)])
p = ggplot(deparsed, aes(x = start_date, y = treatment)) + geom_segment(aes(x = start_date, 
        xend = end_date, y = treatment, yend = treatment), size = 2, alpha = 0.85, 
        col = 'grey20') + theme_bw() + scale_x_date(date_breaks = '6 months') + 
        scale_y_discrete(limits = yorder) + theme(axis.text.x = element_text(angle = 60, 
        hjust = 1), legend.position = "none") + ylab("") + xlab("") + 
        ggtitle(paste0('Original treatment'))

tts = unique(deparsed.sm$treatment)
tts = tts[tts!='None']
yorder = c('None',tts[order(nchar(tts), tts)])
p.sm = ggplot(deparsed.sm, aes(x = start_date, y = treatment)) + geom_segment(aes(x = start_date, 
        xend = end_date, y = treatment, yend = treatment), size = 2, alpha = 0.85, 
        col = 'grey20') + theme_bw() + scale_x_date(date_breaks = '6 months') + 
        scale_y_discrete(limits = yorder) + theme(axis.text.x = element_text(angle = 60, 
        hjust = 1), legend.position = "none") + ylab("") + xlab("") + 
        ggtitle(paste0('Smoothed treatment'))

grid.arrange(p,p.sm,ncol=1)
    

## ----window_size--------------------------------------------------------------
my_data <- dplyr::filter(drugstreatment, id == '25094328-3819-4061-941d-191c4e0bc939')

structured_df <- smooth_parse(
  id = my_data$id,
  start_date = my_data$start_date,
  end_date = my_data$end_date,
  drug = my_data$drug,
  study_from = "1970-01-01",
  study_to = "1975-01-01"
)

# smooth
smoothed <- smooth_algorithm(id = structured_df$id, 
                             treatment = structured_df$treatment, 
                             day = structured_df$day, 
                             N = structured_df$N, 
                             width = 31)
smoothed45 <- smooth_algorithm(id = structured_df$id, 
                             treatment = structured_df$treatment, 
                             day = structured_df$day, 
                             N = structured_df$N, 
                             width = 45)
smoothed61 <- smooth_algorithm(id = structured_df$id, 
                             treatment = structured_df$treatment, 
                             day = structured_df$day, 
                             N = structured_df$N, 
                             width = 61)


deparsed <- smooth_deparse(smoothed$id, smoothed$day, smoothed$treatment)
deparsed.sm <- smooth_deparse(smoothed$id, smoothed$day, smoothed$smoothed_treatment)
deparsed.sm45 <- smooth_deparse(smoothed45$id, smoothed45$day, smoothed45$smoothed_treatment)
deparsed.sm61 <- smooth_deparse(smoothed61$id, smoothed61$day, smoothed61$smoothed_treatment)


## ----viz_window, fig.show='hold',eval=T,echo=F,fig.align='center',fig.height=8,fig.width=8----
## plot:
tts = unique(deparsed$treatment)
tts = tts[tts!='None']
yorder = c('None',tts[order(nchar(tts), tts)])
p = ggplot(deparsed, aes(x = start_date, y = treatment)) + 
  geom_segment(aes(x = start_date, 
                   xend = end_date, y = treatment, yend = treatment), linewidth = 2, alpha = 0.85, 
               col = 'grey20') + theme_bw() + scale_x_date(date_breaks = '6 months') + 
  scale_y_discrete(limits = yorder) + 
  theme(axis.text.x = element_text(angle = 60, hjust = 1), legend.position = "none") + 
  ylab("") + xlab("") + 
  ggtitle(paste0('Original raw data treatment'))
#> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
#> ℹ Please use `linewidth` instead.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.

tts = unique(deparsed.sm$treatment)
tts = tts[tts!='None']
yorder = c('None',tts[order(nchar(tts), tts)])
p.sm = ggplot(deparsed.sm, aes(x = start_date, y = treatment)) + 
  geom_segment(aes(x = start_date, 
                   xend = end_date, y = treatment, yend = treatment), linewidth = 2, alpha = 0.85, col = 'grey20') + 
  theme_bw() + scale_x_date(date_breaks = '6 months') + 
  scale_y_discrete(limits = yorder) + 
  theme(axis.text.x = element_text(angle = 60, hjust = 1), legend.position = "none") + 
  ylab("") + xlab("") + 
  ggtitle(paste0('Smoothed treatment (31 days window)'))



tts = unique(deparsed.sm45$treatment)
tts = tts[tts!='None']
yorder = c('None',tts[order(nchar(tts), tts)])
p.sm45 = ggplot(deparsed.sm45, aes(x = start_date, y = treatment)) + 
  geom_segment(aes(x = start_date, 
                   xend = end_date, y = treatment, yend = treatment), linewidth = 2, alpha = 0.85, col = 'grey20') + 
  theme_bw() + scale_x_date(date_breaks = '6 months') + 
  scale_y_discrete(limits = yorder) + 
  theme(axis.text.x = element_text(angle = 60, hjust = 1), legend.position = "none") + 
  ylab("") + xlab("") + 
  ggtitle(paste0('Smoothed treatment (45 days window)'))



tts = unique(deparsed.sm61$treatment)
tts = tts[tts!='None']
yorder = c('None',tts[order(nchar(tts), tts)])
p.sm61 = ggplot(deparsed.sm61, aes(x = start_date, y = treatment)) + 
  geom_segment(aes(x = start_date, 
                   xend = end_date, y = treatment, yend = treatment), linewidth = 2, alpha = 0.85, col = 'grey20') + 
  theme_bw() + scale_x_date(date_breaks = '6 months') + 
  scale_y_discrete(limits = yorder) + 
  theme(axis.text.x = element_text(angle = 60, hjust = 1), legend.position = "none") + 
  ylab("") + xlab("") + 
  ggtitle(paste0('Smoothed treatment (61 days window)'))




gridExtra::grid.arrange(p,p.sm,p.sm45,p.sm61,ncol=1)

## ----echo=TRUE,eval=FALSE-----------------------------------------------------
#  library(doParallel)
#  library(foreach)
#  library(snow)
#  library(doSNOW)

## ----parallel1, echo=TRUE,eval=FALSE------------------------------------------
#  data = drugstreatment
#  
#  # starting date:
#  s0 = Sys.time()
#  
#  # create chunks:
#  chunksize = 50
#  niter <- n_distinct(drugs$id)
#  chunks <- ceiling(niter/chunksize)
#  inds <- split(seq_len(niter), sort(rep_len(seq_len(chunks),
#                                             niter)))

## ----parallel2, echo=TRUE,eval=FALSE------------------------------------------
#  
#  # parallel - cores and socket:
#  n.cores <- 3
#  cl <- snow::makeSOCKcluster(n.cores)
#  doSNOW::registerDoSNOW(cl)
#  
#  # progress bar:
#  pb <- utils::txtProgressBar(min = 1, max = chunks, style = 3)
#  progress <- function(n) utils::setTxtProgressBar(pb,n)
#  opts <- list(progress = progress)
#  

## ----parallel3, echo=TRUE,eval=FALSE------------------------------------------
#  
#  # path to temporal directory:
#  tmp.path = tempdir()
#  diff=FALSE
#  
#  l <- foreach(c = 1:chunks,
#               .packages = c("Kendall", "smoothy", "data.table", "anytime", "dplyr"),
#               .options.snow = opts,
#               .multicombine = F) %dopar% {
#  
#                 chunk.id <- unique(data$id)[inds[[c]]]
#  
#                 # run the workflow in each individual from the chunk:
#                 df <- filter(data, id %in% chunk.id)
#  
#                 # 1) parse data:
#                 structured_df <- smooth_parse(
#                   id = df$id,
#                   start_date = df$start_date,
#                   end_date = df$end_date,
#                   drug = df$drug,
#                   study_from = "1970-01-01",
#                   study_to = "1975-01-01"
#                 )
#  
#                 # 2) smooth algorithm:
#                 width <- 61
#  
#                 smoothed <- smooth_algorithm(
#                   id = structured_df$id,
#                   treatment = structured_df$treatment,
#                   day = structured_df$day,
#                   N =  structured_df$N,
#                   width = width
#                   )
#  
#                 # 3) deparse data (original format):
#                 deparsed_smoothed <- smooth_deparse(
#                   smoothed$id,
#                   smoothed$day,
#                   smoothed$smoothed_treatment
#                   )
#  
#  
#                 # 4) Per patient changes due to smooth algorithm:
#                 if(diff){
#  
#                   # Calculate differences by patient mapping with the group_map function:
#                   df <- smoothed %>%
#                     group_by(id) %>%
#                     group_map(~ smooth_diff(.$treatment,.$smoothed_treatment)) %>%
#                     bind_rows(.id = "group_id") %>%
#                     data.frame
#  
#                   # Format output and filter global, exposure period:
#                   df <- df %>%
#                     mutate(percentage_of_change = round(proportion_of_change*100,2)) %>%
#                     filter(type%in%c('Global','Exposure period')) %>%
#                     mutate(type = factor(type,levels=c('Global','Exposure period'),
#                                          labels=c('total_change','exposure_change')))
#                   # add 'id' and reshape:
#                   df <- df %>%
#                     left_join(data.frame(id=unique(smoothed$id),group_id = as.character(seq(1,n_distinct(smoothed$id))))) %>%
#                     reshape2::dcast(id~type,value.var='percentage_of_change')
#  
#                   # attach to deparsed_smoothed dataframe:
#                   deparsed_smoothed <- left_join(
#                     deparsed_smoothed,
#                     df
#                   )
#  
#                   rm(df)
#                 }
#  
#                 # Save chunk output to a temporary folder:
#                 saveRDS(deparsed_smoothed,paste0(tmp.path,"/chunk_",c,".rds"))
#  
#                 rm(df,structured_df,smoothed,deparsed_smoothed);gc()
#  
#               }
#  
#  

## ----parallel31, echo=TRUE,eval=FALSE-----------------------------------------
#  
#  # Calculate differences by patient mapping with the group_map function:
#  aux <- smoothed %>%
#    group_by(id) %>%
#    group_map(~ smooth_diff(.$treatment,.$smoothed_treatment)) %>%
#    bind_rows(.id = "group_id") %>%
#    data.frame
#  
#  # Format output and filter global, exposure period:
#  aux <- aux %>%
#    mutate(percentage_of_change = round(proportion_of_change*100,2)) %>%
#    filter(type%in%c('Global','Exposure period')) %>%
#    mutate(type = factor(type,levels=c('Global','Exposure period'),
#                         labels=c('total_change','exposure_change')))
#  # add 'id' and reshape:
#  aux <- aux %>%
#    left_join(data.frame(id=unique(smoothed$id),group_id = as.character(seq(1,n_distinct(smoothed$id))))) %>%
#    reshape2::dcast(id~type,value.var='percentage_of_change')
#  
#  # join to deparsed_smoothed dataframe:
#  deparsed_smoothed <- left_join(
#    deparsed_smoothed,
#    aux
#  )
#  
#  
#  

## ----parallel32, echo=TRUE,eval=FALSE-----------------------------------------
#  # close sockets:
#  close(pb)
#  snow::stopCluster(cl)
#  
#  # Time to finish the process:
#  t0 = Sys.time() - s0
#  cat("\n The process finished in", round(t0), units(t0))

## ----parallel4, echo=TRUE,eval=FALSE------------------------------------------
#  # Import and combine all chunks into a single data.frame:
#  rds_files <- list.files(tmp.path, pattern = "chunk_", full.names = TRUE)
#  all_chunks <- bind_rows(lapply(rds_files, readRDS))

Try the smoothy package in your browser

Any scripts or data that you put into this service are public.

smoothy documentation built on Aug. 11, 2023, 9:06 a.m.