Nothing
## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
options(rmarkdown.html_vignette.check_title = FALSE)
## ----echo=TRUE,eval=F---------------------------------------------------------
# install.packages('smoothy')
## ----setup, warning=FALSE,echo=T----------------------------------------------
library(smoothy)
library(tidyr)
library(knitr)
## ----data---------------------------------------------------------------------
data(drugstreatment)
## ----data_tab, echo=FALSE, results='asis'-------------------------------------
kable(head(drugstreatment),format='markdown')
## ----single-------------------------------------------------------------------
my_data <- dplyr::filter(drugstreatment, id == "62eb1ebd-1b49-4ba7-9af9-ec0c4ada0956")
## ----parse--------------------------------------------------------------------
structured_df <- smooth_parse(
id = my_data$id,
start_date = my_data$start_date,
end_date = my_data$end_date,
drug = my_data$drug,
study_from = "1970-01-01",
study_to = "1975-01-01"
)
## ----parse_tab, echo=FALSE, results='asis'------------------------------------
kable(head(structured_df),format='markdown')
## ----smooth-------------------------------------------------------------------
smoothed <- smooth_algorithm(
id = structured_df$id,
treatment = structured_df$treatment,
day = structured_df$day,
N = structured_df$N,
width = 61
)
## ----smooth_tab, echo=FALSE, results='asis'-----------------------------------
kable(head(smoothed),format='markdown')
## ----deparse------------------------------------------------------------------
deparsed.sm <- smooth_deparse(smoothed$id, smoothed$day, smoothed$smoothed_treatment)
## ----deparsed_tab, echo=FALSE, results='asis'---------------------------------
knitr::kable(deparsed.sm,format='markdown')
## ----deparse0-----------------------------------------------------------------
deparsed <- smooth_deparse(smoothed$id, smoothed$day, smoothed$treatment)
## ----count diff, eval=FALSE---------------------------------------------------
# smooth_diff(treatment = smoothed$treatment, smoothed_treatment = smoothed$smoothed_treatment)
## ----count diff_tab, echo=FALSE, results='asis'-------------------------------
kable(smooth_diff(treatment = smoothed$treatment, smoothed_treatment = smoothed$smoothed_treatment),format='markdown')
## ----viz, fig.show='hold',eval=T,echo=T,fig.align='center',fig.height=4,fig.width=8----
require(ggplot2)
require(gridExtra)
tts = unique(deparsed$treatment)
tts = tts[tts!='None']
yorder = c('None',tts[order(nchar(tts), tts)])
p = ggplot(deparsed, aes(x = start_date, y = treatment)) + geom_segment(aes(x = start_date,
xend = end_date, y = treatment, yend = treatment), size = 2, alpha = 0.85,
col = 'grey20') + theme_bw() + scale_x_date(date_breaks = '6 months') +
scale_y_discrete(limits = yorder) + theme(axis.text.x = element_text(angle = 60,
hjust = 1), legend.position = "none") + ylab("") + xlab("") +
ggtitle(paste0('Original treatment'))
tts = unique(deparsed.sm$treatment)
tts = tts[tts!='None']
yorder = c('None',tts[order(nchar(tts), tts)])
p.sm = ggplot(deparsed.sm, aes(x = start_date, y = treatment)) + geom_segment(aes(x = start_date,
xend = end_date, y = treatment, yend = treatment), size = 2, alpha = 0.85,
col = 'grey20') + theme_bw() + scale_x_date(date_breaks = '6 months') +
scale_y_discrete(limits = yorder) + theme(axis.text.x = element_text(angle = 60,
hjust = 1), legend.position = "none") + ylab("") + xlab("") +
ggtitle(paste0('Smoothed treatment'))
grid.arrange(p,p.sm,ncol=1)
## ----window_size--------------------------------------------------------------
my_data <- dplyr::filter(drugstreatment, id == '25094328-3819-4061-941d-191c4e0bc939')
structured_df <- smooth_parse(
id = my_data$id,
start_date = my_data$start_date,
end_date = my_data$end_date,
drug = my_data$drug,
study_from = "1970-01-01",
study_to = "1975-01-01"
)
# smooth
smoothed <- smooth_algorithm(id = structured_df$id,
treatment = structured_df$treatment,
day = structured_df$day,
N = structured_df$N,
width = 31)
smoothed45 <- smooth_algorithm(id = structured_df$id,
treatment = structured_df$treatment,
day = structured_df$day,
N = structured_df$N,
width = 45)
smoothed61 <- smooth_algorithm(id = structured_df$id,
treatment = structured_df$treatment,
day = structured_df$day,
N = structured_df$N,
width = 61)
deparsed <- smooth_deparse(smoothed$id, smoothed$day, smoothed$treatment)
deparsed.sm <- smooth_deparse(smoothed$id, smoothed$day, smoothed$smoothed_treatment)
deparsed.sm45 <- smooth_deparse(smoothed45$id, smoothed45$day, smoothed45$smoothed_treatment)
deparsed.sm61 <- smooth_deparse(smoothed61$id, smoothed61$day, smoothed61$smoothed_treatment)
## ----viz_window, fig.show='hold',eval=T,echo=F,fig.align='center',fig.height=8,fig.width=8----
## plot:
tts = unique(deparsed$treatment)
tts = tts[tts!='None']
yorder = c('None',tts[order(nchar(tts), tts)])
p = ggplot(deparsed, aes(x = start_date, y = treatment)) +
geom_segment(aes(x = start_date,
xend = end_date, y = treatment, yend = treatment), linewidth = 2, alpha = 0.85,
col = 'grey20') + theme_bw() + scale_x_date(date_breaks = '6 months') +
scale_y_discrete(limits = yorder) +
theme(axis.text.x = element_text(angle = 60, hjust = 1), legend.position = "none") +
ylab("") + xlab("") +
ggtitle(paste0('Original raw data treatment'))
#> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
#> ℹ Please use `linewidth` instead.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.
tts = unique(deparsed.sm$treatment)
tts = tts[tts!='None']
yorder = c('None',tts[order(nchar(tts), tts)])
p.sm = ggplot(deparsed.sm, aes(x = start_date, y = treatment)) +
geom_segment(aes(x = start_date,
xend = end_date, y = treatment, yend = treatment), linewidth = 2, alpha = 0.85, col = 'grey20') +
theme_bw() + scale_x_date(date_breaks = '6 months') +
scale_y_discrete(limits = yorder) +
theme(axis.text.x = element_text(angle = 60, hjust = 1), legend.position = "none") +
ylab("") + xlab("") +
ggtitle(paste0('Smoothed treatment (31 days window)'))
tts = unique(deparsed.sm45$treatment)
tts = tts[tts!='None']
yorder = c('None',tts[order(nchar(tts), tts)])
p.sm45 = ggplot(deparsed.sm45, aes(x = start_date, y = treatment)) +
geom_segment(aes(x = start_date,
xend = end_date, y = treatment, yend = treatment), linewidth = 2, alpha = 0.85, col = 'grey20') +
theme_bw() + scale_x_date(date_breaks = '6 months') +
scale_y_discrete(limits = yorder) +
theme(axis.text.x = element_text(angle = 60, hjust = 1), legend.position = "none") +
ylab("") + xlab("") +
ggtitle(paste0('Smoothed treatment (45 days window)'))
tts = unique(deparsed.sm61$treatment)
tts = tts[tts!='None']
yorder = c('None',tts[order(nchar(tts), tts)])
p.sm61 = ggplot(deparsed.sm61, aes(x = start_date, y = treatment)) +
geom_segment(aes(x = start_date,
xend = end_date, y = treatment, yend = treatment), linewidth = 2, alpha = 0.85, col = 'grey20') +
theme_bw() + scale_x_date(date_breaks = '6 months') +
scale_y_discrete(limits = yorder) +
theme(axis.text.x = element_text(angle = 60, hjust = 1), legend.position = "none") +
ylab("") + xlab("") +
ggtitle(paste0('Smoothed treatment (61 days window)'))
gridExtra::grid.arrange(p,p.sm,p.sm45,p.sm61,ncol=1)
## ----echo=TRUE,eval=FALSE-----------------------------------------------------
# library(doParallel)
# library(foreach)
# library(snow)
# library(doSNOW)
## ----parallel1, echo=TRUE,eval=FALSE------------------------------------------
# data = drugstreatment
#
# # starting date:
# s0 = Sys.time()
#
# # create chunks:
# chunksize = 50
# niter <- n_distinct(drugs$id)
# chunks <- ceiling(niter/chunksize)
# inds <- split(seq_len(niter), sort(rep_len(seq_len(chunks),
# niter)))
## ----parallel2, echo=TRUE,eval=FALSE------------------------------------------
#
# # parallel - cores and socket:
# n.cores <- 3
# cl <- snow::makeSOCKcluster(n.cores)
# doSNOW::registerDoSNOW(cl)
#
# # progress bar:
# pb <- utils::txtProgressBar(min = 1, max = chunks, style = 3)
# progress <- function(n) utils::setTxtProgressBar(pb,n)
# opts <- list(progress = progress)
#
## ----parallel3, echo=TRUE,eval=FALSE------------------------------------------
#
# # path to temporal directory:
# tmp.path = tempdir()
# diff=FALSE
#
# l <- foreach(c = 1:chunks,
# .packages = c("Kendall", "smoothy", "data.table", "anytime", "dplyr"),
# .options.snow = opts,
# .multicombine = F) %dopar% {
#
# chunk.id <- unique(data$id)[inds[[c]]]
#
# # run the workflow in each individual from the chunk:
# df <- filter(data, id %in% chunk.id)
#
# # 1) parse data:
# structured_df <- smooth_parse(
# id = df$id,
# start_date = df$start_date,
# end_date = df$end_date,
# drug = df$drug,
# study_from = "1970-01-01",
# study_to = "1975-01-01"
# )
#
# # 2) smooth algorithm:
# width <- 61
#
# smoothed <- smooth_algorithm(
# id = structured_df$id,
# treatment = structured_df$treatment,
# day = structured_df$day,
# N = structured_df$N,
# width = width
# )
#
# # 3) deparse data (original format):
# deparsed_smoothed <- smooth_deparse(
# smoothed$id,
# smoothed$day,
# smoothed$smoothed_treatment
# )
#
#
# # 4) Per patient changes due to smooth algorithm:
# if(diff){
#
# # Calculate differences by patient mapping with the group_map function:
# df <- smoothed %>%
# group_by(id) %>%
# group_map(~ smooth_diff(.$treatment,.$smoothed_treatment)) %>%
# bind_rows(.id = "group_id") %>%
# data.frame
#
# # Format output and filter global, exposure period:
# df <- df %>%
# mutate(percentage_of_change = round(proportion_of_change*100,2)) %>%
# filter(type%in%c('Global','Exposure period')) %>%
# mutate(type = factor(type,levels=c('Global','Exposure period'),
# labels=c('total_change','exposure_change')))
# # add 'id' and reshape:
# df <- df %>%
# left_join(data.frame(id=unique(smoothed$id),group_id = as.character(seq(1,n_distinct(smoothed$id))))) %>%
# reshape2::dcast(id~type,value.var='percentage_of_change')
#
# # attach to deparsed_smoothed dataframe:
# deparsed_smoothed <- left_join(
# deparsed_smoothed,
# df
# )
#
# rm(df)
# }
#
# # Save chunk output to a temporary folder:
# saveRDS(deparsed_smoothed,paste0(tmp.path,"/chunk_",c,".rds"))
#
# rm(df,structured_df,smoothed,deparsed_smoothed);gc()
#
# }
#
#
## ----parallel31, echo=TRUE,eval=FALSE-----------------------------------------
#
# # Calculate differences by patient mapping with the group_map function:
# aux <- smoothed %>%
# group_by(id) %>%
# group_map(~ smooth_diff(.$treatment,.$smoothed_treatment)) %>%
# bind_rows(.id = "group_id") %>%
# data.frame
#
# # Format output and filter global, exposure period:
# aux <- aux %>%
# mutate(percentage_of_change = round(proportion_of_change*100,2)) %>%
# filter(type%in%c('Global','Exposure period')) %>%
# mutate(type = factor(type,levels=c('Global','Exposure period'),
# labels=c('total_change','exposure_change')))
# # add 'id' and reshape:
# aux <- aux %>%
# left_join(data.frame(id=unique(smoothed$id),group_id = as.character(seq(1,n_distinct(smoothed$id))))) %>%
# reshape2::dcast(id~type,value.var='percentage_of_change')
#
# # join to deparsed_smoothed dataframe:
# deparsed_smoothed <- left_join(
# deparsed_smoothed,
# aux
# )
#
#
#
## ----parallel32, echo=TRUE,eval=FALSE-----------------------------------------
# # close sockets:
# close(pb)
# snow::stopCluster(cl)
#
# # Time to finish the process:
# t0 = Sys.time() - s0
# cat("\n The process finished in", round(t0), units(t0))
## ----parallel4, echo=TRUE,eval=FALSE------------------------------------------
# # Import and combine all chunks into a single data.frame:
# rds_files <- list.files(tmp.path, pattern = "chunk_", full.names = TRUE)
# all_chunks <- bind_rows(lapply(rds_files, readRDS))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.