# smooth.process2: smooth process for prescription or dispensations --------
#' @param data: Input data.frame
#' @param Wt: smooth window in days
#' @param st: minimum splitting days between prescriptions or dispensations
#' @param n.cores: Number of cores tu use during parallelization
#' @param chunks: Number of chunks during parallelization
smooth.process2 = function(data,Wt=30,st=60,
n.cores=parallel::detectCores() - 1,
chunks=NULL){
options(warn=-1)
# - Individuals to process and Window parameter_
id = unique(data$idp)
Wt = Wt
st = st
start = min(data$dat)
end = max(data$dbaixa)
seq = seq(start,end,'day')
# - Drugs to smooth_
select.drugs = colnames(data)[(grep('drugs',colnames(data))+1):ncol(data)]
# - Configure parallelization_
niter <- n_distinct(id) # iterations
n.cores = n.cores
if(!is.null(chunks)){
inds <- split(seq_len(niter), sort(rep_len(seq_len(chunks), niter))) # - Create chunks
# - smooth parallelized by individual_
s = Sys.time()
sdata = foreach(l = seq_along(inds),
.packages = c('Kendall','UEMR','tidyverse','data.table','anytime','dplyr','pbmcapply'),
.combine = 'rbind.data.frame',
.multicombine = F) %dopar% {
ab1 <- pbmcapply::pbmclapply(inds[[l]], function(i) {
smooth.algorithm2 = function(x,select.drugs,Wt,st,id){
MaxTable <- function(x){
dd <- unique(x) %>% na.omit()
dd[which.max(tabulate(match(x,dd)))]
}
select.drugs = select.drugs
Wt = Wt
st = st
id = id
# Calculem els subsets on suavitzarem:
# - Smooth comença i acaba en els períodes registrats de prescripció/facturació, no es tenen en compte el '0' inicial ni final
# - Es subdivideix l'individu per tractaments a partir del paràmetre st
# - A cada tractament es suavitza sota el paràmetre Wt
# - Si dins d'una finestra, el pattern guanyador és '0' es queda l'observat
aux = x %>% mutate(long = dbaixa-dat+1,
n=1:nrow(.),
s = cumsum(ifelse((drugs=='0' & long>=st)|(drugs=='0' & n==nrow(.)),T,F)),
n=as.numeric(ifelse(drugs=='0' & n==1,F,T)),
s=n+s) %>% select(-n)
v = data.frame(seq=seq,
t=rep(aux$drugs, aux$long),
l=rep(aux$long, aux$long),
s=rep(aux$s, aux$long))
# split dataframe i suavizar en cada subset:
S = split(v,v$s)
# smooth:
df = lapply(1:length(S),function(i){
a = S[[i]] %>% filter(!(t=='0' & l>st)) %>% mutate(t=as.character(t))
if(nrow(a)!=0){
# Most frequent pattern as we move the window:
mcin = lapply(1:length(a$t), function(j) {
res = MaxTable(a[j:(Wt + j),'t'])
if(res=='0'){
res = a[j:(Wt + j),'t']
} else{
res = rep(res, Wt)
}
res = rev(res)
}) %>% do.call(rbind, .)
# most frequent drug each day:
# - create an indicator for all diagonals in the matrix
d <- row(mcin) - col(mcin)
# - use split to group on these values
mcin = split(mcin, d)
# - max value at each diagonal
sm.v = lapply(1:length(mcin),function(k){
MaxTable(mcin[[k]])
}) %>% do.call(c,.)
return(data.frame(seq = a$seq, v = a$t, sm.v = sm.v[1:length(a$t)]))
}
}) %>% bind_rows()
# post-process:
# - add entire study temporal window
df = data.frame(idp = id, seq = seq) %>% left_join(df)
df = df %>% mutate_if(is.factor,as.character)
df[is.na(df)] = '0'
df = df %>% mutate(v = v, sm.v = sm.v,g.sm.drug = rleid(sm.v))
# - aggregate by treatments (including No treatment)
df = df %>% mutate(pchange = 100 - perc(sum(sm.v == v), nrow(df))) %>%
group_by(g.sm.drug) %>%
summarise(idp = first(idp),
dat = first(seq), dbaixa = last(seq), sm.drug = first(sm.v),
pchange = first(pchange)) %>% select(-g.sm.drug) %>%
ungroup
#- columns per drug
df = lapply(1:length(select.drugs), function(s) {
return(data.frame(n = 1:nrow(df),
drug = select.drugs[s],
value = stringr::str_detect(df$sm.drug,
paste0('^',select.drugs[s],'$','|','\\+',
select.drugs[s],'|',select.drugs[s],'\\+'))))
}) %>% bind_rows() %>% dcast(., n ~ drug, value.var = "value") %>%
select(-n) %>% mutate(nd = rowSums(.),
th = ifelse(nd > 1, "Polytherapy",
ifelse(nd == 1, "Monotherapy",0))) %>%
bind_cols(df, .)
# - % of change per drug
t.agr = lapply(1:length(select.drugs),function(i){
nvar = select.drugs[i]
a = df %>% mutate(long = dbaixa-dat+1) %>% data.frame
b = aux %>% mutate(long = dbaixa-dat+1)
gplot = data.frame(idp = first(aux$idp),agr = nvar,
fae = rep(as.numeric(b[,nvar]),b$long),
fas= rep(as.numeric(a[,nvar]),a$long))
gplot = gplot %>% mutate(pchange = 100 - perc(sum(fae == fas),nrow(gplot)))
gplot$pchange = ifelse(is.nan(gplot$pchange),0,gplot$pchange)
gplot = gplot %>% group_by(idp) %>% summarise_all(funs(first)) %>% select(idp,agr,pchange)
return(gplot)
}) %>% bind_rows()
t.agr = t.agr %>% dcast(.,idp~agr,value.var='pchange')
colnames(t.agr)[-1] = paste0('pc.',colnames(t.agr)[-1])
# Return smoothed and processed individual:
df = left_join(df,t.agr) %>% arrange(dat) %>% select(idp,dat,dbaixa,sm.drug,select.drugs,everything()) %>% data.frame
return(df)
}
aux = data %>% filter(idp==id[i]) %>% mutate(long = dbaixa-dat+1)
saux = smooth.algorithm2(x=aux,select.drugs = select.drugs,Wt=Wt,id=id[i],st=st)
return(saux)
}, mc.cores = n.cores) %>% bind_rows()
}
e = Sys.time()
t = e-s
cat('\n The smoothing process finished in',round(t),units(t))
} else{
cl <- snow::makeSOCKcluster(n.cores) # make cluster
doSNOW::registerDoSNOW(cl) # register into cluster
# - Progress bar_
pb <- utils::txtProgressBar(min=1, max=niter, style=3)
progress <- function(n) utils::setTxtProgressBar(pb, n)
opts <- list(progress=progress)
# - smooth parallelized by individual_
s = Sys.time()
sdata = foreach(i = 1:niter,
.packages = c('Kendall','UEMR','tidyverse','data.table'),
.options.snow = opts,
.combine = 'rbind.data.frame',
.multicombine = T) %dopar% {
smooth.algorithm2 = function(x,select.drugs,Wt,st,id){
MaxTable <- function(x){
dd <- unique(x) %>% na.omit()
dd[which.max(tabulate(match(x,dd)))]
}
select.drugs = select.drugs
Wt = Wt
st = st
id = id
# Calculem els subsets on suavitzarem:
# - Smooth comença i acaba en els períodes registrats de prescripció/facturació, no es tenen en compte el '0' inicial ni final
# - Es subdivideix l'individu per tractaments a partir del paràmetre st
# - A cada tractament es suavitza sota el paràmetre Wt
# - Si dins d'una finestra, el pattern guanyador és '0' es queda l'observat
aux = x %>% mutate(long = dbaixa-dat+1,
n=1:nrow(.),
s = cumsum(ifelse((drugs=='0' & long>=st)|(drugs=='0' & n==nrow(.)),T,F)),
n=as.numeric(ifelse(drugs=='0' & n==1,F,T)),
s=n+s) %>% select(-n)
v = data.frame(seq=seq,
t=rep(aux$drugs, aux$long),
l=rep(aux$long, aux$long),
s=rep(aux$s, aux$long))
# split dataframe i suavizar en cada subset:
S = split(v,v$s)
# smooth:
df = lapply(1:length(S),function(i){
a = S[[i]] %>% filter(!(t=='0' & l>st)) %>% mutate(t=as.character(t))
if(nrow(a)!=0){
# Most frequent pattern as we move the window:
mcin = lapply(1:length(a$t), function(j) {
res = MaxTable(a[j:(Wt + j),'t'])
if(res=='0'){
res = a[j:(Wt + j),'t']
} else{
res = rep(res, Wt)
}
res = rev(res)
}) %>% do.call(rbind, .)
# most frequent drug each day:
# - create an indicator for all diagonals in the matrix
d <- row(mcin) - col(mcin)
# - use split to group on these values
mcin = split(mcin, d)
# - max value at each diagonal
sm.v = lapply(1:length(mcin),function(k){
MaxTable(mcin[[k]])
}) %>% do.call(c,.)
return(data.frame(seq = a$seq, v = a$t, sm.v = sm.v[1:length(a$t)]))
}
}) %>% bind_rows()
# post-process:
# - add entire study temporal window
df = data.frame(idp = id, seq = seq) %>% left_join(df)
df = df %>% mutate_if(is.factor,as.character)
df[is.na(df)] = '0'
df = df %>% mutate(v = v, sm.v = sm.v,g.sm.drug = rleid(sm.v))
# - aggregate by treatments (including No treatment)
df = df %>% mutate(pchange = 100 - perc(sum(sm.v == v), nrow(df))) %>%
group_by(g.sm.drug) %>%
summarise(idp = first(idp),
dat = first(seq), dbaixa = last(seq), sm.drug = first(sm.v),
pchange = first(pchange)) %>% select(-g.sm.drug) %>%
ungroup
#- columns per drug
df = lapply(1:length(select.drugs), function(s) {
return(data.frame(n = 1:nrow(df),
drug = select.drugs[s],
value = stringr::str_detect(df$sm.drug,
paste0('^',select.drugs[s],'$','|','\\+',
select.drugs[s],'|',select.drugs[s],'\\+'))))
}) %>% bind_rows() %>% dcast(., n ~ drug, value.var = "value") %>%
select(-n) %>% mutate(nd = rowSums(.),
th = ifelse(nd > 1, "Polytherapy",
ifelse(nd == 1, "Monotherapy",0))) %>%
bind_cols(df, .)
# - % of change per drug
t.agr = lapply(1:length(select.drugs),function(i){
nvar = select.drugs[i]
a = df %>% mutate(long = dbaixa-dat+1) %>% data.frame
b = aux %>% mutate(long = dbaixa-dat+1)
gplot = data.frame(idp = first(aux$idp),agr = nvar,
fae = rep(as.numeric(b[,nvar]),b$long),
fas= rep(as.numeric(a[,nvar]),a$long))
gplot = gplot %>% mutate(pchange = 100 - perc(sum(fae == fas),nrow(gplot)))
gplot$pchange = ifelse(is.nan(gplot$pchange),0,gplot$pchange)
gplot = gplot %>% group_by(idp) %>% summarise_all(funs(first)) %>% select(idp,agr,pchange)
return(gplot)
}) %>% bind_rows()
t.agr = t.agr %>% dcast(.,idp~agr,value.var='pchange')
colnames(t.agr)[-1] = paste0('pc.',colnames(t.agr)[-1])
# Return smoothed and processed individual:
df = left_join(df,t.agr) %>% arrange(dat) %>% select(idp,dat,dbaixa,sm.drug,select.drugs,everything()) %>% data.frame
return(df)
}
aux = data %>% filter(idp==id[i]) %>% mutate(long = dbaixa-dat+1)
saux = smooth.algorithm2(x=aux,select.drugs = select.drugs,Wt=Wt,id=id[i],st=st)
return(saux)
}
e = Sys.time()
t = e-s
cat('\n The smoothing process finished in',round(t),units(t))
close(pb) # close the cluster
snow::stopCluster(cl)
}
# return
return(sdata %>% select(idp,dat,dbaixa,nd,th,drugs = sm.drug,everything()))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.