#' fn_aggre
#'
#' this is a wrapp version of aggregate function in R base package.
#'
#' @author sontron
#' @param data is a data.frame object that need to be processed
#' @param group are the dimension variables
#' @param val are the variables that needed calculated
#' @param subsets are subset of data object
#' @return same as aggregate function
#'
#' @examples
#' fn_aggre(iris,group='Species',val=c('Sepal.Length','Sepal.Width'),fun='mean')
#' fn_aggre(iris,group='Species',val=c('Sepal.Length','Sepal.Width'),fun='function(x)median(x,na.rm=T)')
#' fn_aggre(iris,group='Species',val=c('Sepal.Length','Sepal.Width'),
#' fun='mean',subsets=list(val=c('Species','Sepal.Length'),legal=list(c('setosa','virginica'),
#' c('[0,10]')),method=c('elements','ranges'),mode=c('character','numeric')))
#'
#'
#' @export
fn_aggre<-function(data=NA,group=NA,val=NA,fun='mean',subsets=NA){
if(all(is.na(subsets))) {
data } else {
legal_data(data,val=subsets$val,legal=subsets$legal,method=subsets$method,mode=subsets$mode)->ind
data<-data[ind,]
}
if(fun=='length') fun=function(x)sum(!is.na(x))
df.val<-data[,val,drop=F]
if(length(group)==1) df.by<-list(data[,group]) else df.by<-as.list(data[,group])
aggregate(x=df.val,by=df.by,FUN=eval(parse(text=fun)))
}
#' myApply
#' a little change to apply. avoid error for apply function to a data.frame that only contain one var.
#'
#' @author sontron
#'
#' @export
myApply<-function(data,vars,MARGIN,FUN,na.rm=T,...){
stopifnot(class(data)%in%c('data.frame','matrix')&&all(is.element(vars,names(data))))
#if(length(vars)==1) {
# apply(as.matrix(data[,vars]),MARGIN,FUN,...)->resmyApply
#} else {
# apply(data[,vars],MARGIN,FUN,...)->resmyApply
#}
# apply(as.matrix(data[,vars],ncol=length(vars)),MARGIN,FUN,...)->resmyApply
apply(data[,vars,drop=F],MARGIN,FUN,...)->resmyApply
return(resmyApply)
}
#' fn_discrete
#'
#' fn_discrete is a function simlar with legal_set, but return a regrouped value from input x.
#'
#' @author sontron
#' @param x is a vector which could be numeric, datetime or character
#' @param groups is simlar with L in legal_set, but different in that for elements and substrs it should be a list
#' @param Labels defines the labels for the returned value
#' @param method could be ranges, substrs and elements
#' @param mode could be numeric, character or datetime
#' @return a regrouped vector
#'
#' @examples
#' x=rnorm(100)
#' fn_discrete(x,groups=c('(-Inf,0]','(0,Inf)'),Labels=c('negative_value','positive_value'),method='ranges',mode='numeric')
#' x=sample(letters[1:5],100,rep=T)
#' fn_discrete(x,groups=list(c('a','b'),c('c','d','e')),Labels=c('a-b','c-e'),method='elements',mode='character')
#' x=c('google','goodbye','doodle','doodoge','go!','bad')
#' fn_discrete(x,groups=list(c('goo'),c('doo')),Labels=c('include_goo','include_doo'),method='substrs',mode='character')
#'
#'@export
fn_discrete<-function(x,groups,Labels=NULL,na.val='others',method=c('ranges','substrs','elements')[1],mode=c('numeric','character','datetime')[1]){
stopifnot(method%in%c('elements','substrs','ranges')|mode%in%c('numeric','character','datetime'))
require('stringi')
## numeric and datetime value within a given range
if(method=='ranges'){
len<-length(groups)
sapply(1:len,function(i){
if(grepl(',',groups[[i]])){
unlist(strsplit(groups[[i]],",",fixed=T))->l
if(mode=='numeric'){
if(grepl("(",l[1],fixed=T)) {as.numeric(gsub("(^[[:punct:]]|[[:punct:]]$)","",l[1]))->range_l;x>range_l->ind.l}
if(grepl(")",l[2],fixed=T)) {as.numeric(gsub("(^[[:punct:]]|[[:punct:]]$)","",l[2]))->range_r;x<range_r->ind.r}
if(grepl("[",l[1],fixed=T)) {as.numeric(gsub("(^[[:punct:]]|[[:punct:]]$)","",l[1]))->range_l;x>=range_l->ind.l}
if(grepl("]",l[2],fixed=T)) {as.numeric(gsub("(^[[:punct:]]|[[:punct:]]$)","",l[2]))->range_r;x<=range_r->ind.r}
}
if(mode=='datetime'){
as.POSIXct(x)->x
if(grepl("(",l[1],fixed=T)) {as.numeric(as.POSIXct(gsub("(^[[:punct:]]|[[:punct:]]$)","",l[1])))->range_l;x>range_l->ind.l}
if(grepl(")",l[2],fixed=T)) {as.numeric(as.POSIXct(gsub("(^[[:punct:]]|[[:punct:]]$)","",l[2])))->range_r;x<range_r->ind.r}
if(grepl("[",l[1],fixed=T)) {as.numeric(as.POSIXct(gsub("(^[[:punct:]]|[[:punct:]]$)","",l[1])))->range_l;x>=range_l->ind.l}
if(grepl("]",l[2],fixed=T)) {as.numeric(as.POSIXct(gsub("(^[[:punct:]]|[[:punct:]]$)","",l[2])))->range_r;x<=range_r->ind.r}
}
ifelse(ind.l&ind.r,T,F)->res
} else {
if(mode=='numeric'){
gsub("(^[[:punct:]]|[[:punct:]]$)","",L[i])->l
as.numeric(l)->range_p
}
if(mode=='datetime'){
gsub("(^[[:punct:]]|[[:punct:]]$)","",L[i])->l
as.POSIXct(x)->x
as.numeric(as.POSIXct(l))->range_p
}
ifelse(x==range_p,T,F)->res
}
return(res)
})->res
#apply(matrix(res,nc=len),1,any)->res
}
# discrete numeric values or characters within a given set.
if(method=='elements'){
length(groups)->len_ele
sapply(1:len_ele,function(i){
ifelse(is.element(x,groups[[i]]),T,F)->res
return(res)
})->res
}
# subset of character strings within a given set.
if(method=='substrs'){
length(groups)->len_substr
sapply(1:len_substr,function(i){
sapply(groups[[i]],function(j){
stri_detect_fixed(x,j)->res
return(res)
})->res
apply(as.matrix(res),1,any)->res
return(res)
})->res
}
apply(matrix(res,nc=length(groups)),1,function(x){
which(x)->res2
if(length(res2)!=0) res2 else res2<-NA
return(res2)
})->result
if(is.null(Labels)) Labels<-paste('V',1:length(groups),sep='_')
result.final<-Labels[result]
result.final[is.na(result.final)]<-na.val
return(result.final)
}
#' myDesc
#'
#' a function provide description statistics.
#'
#' @export
myDesc<-function(data,xvars,varType=c('numeric','character','factor','integer','ordered')[1],Digits=4,nameX='x',seed=123,tabSort=TRUE){
if(is.character(data)) data=eval(as.name(data))
as.data.frame(data[,xvars])->dt
nameX=xvars
varType=class(data[,xvars])[1]
x=data[,xvars]
names(dt)<-nameX
if(varType%in%c('numeric','integer')){
summary(x)->resNum
if(length(resNum)==6) {
names(resNum)<-c('Min','0.25Qu.','Median','Mean','0.75Qu.','Max')
resNum[7]<-0
names(resNum)[7]<-'NAs'
}
if(length(resNum)==7) {
names(resNum)<-c('Min','0.25Qu.','Median','Mean','0.75Qu.','Max','NAs')
}
sdNum<-sd(x,na.rm=T)
round(resNum,Digits)->resNumRd
round(sdNum,Digits)->sdNumRd
desNormalNum<-paste(resNumRd[4],'±',sdNumRd,sep='')
names(desNormalNum)<-'NormalDist.'
desNonnormalNum<-paste(resNumRd[3],'[',resNumRd[2],',',resNumRd[5],']',sep='')
names(desNonnormalNum)<-'NonnormalDist.'
as.data.frame(c(resNumRd,desNormalNum,desNonnormalNum))->resTab
# if(length(x)<5000) {
# shapiro.test(x)->resShap
# } else {
# set.seed(seed)
# shapiro.test(x[sample(1:length(x),5000,rep=F)])->resShap
# }
resShap<-ksnormTest(x)
resTab<-as.data.frame(resTab)
names(resTab)<-'DescRes'
#resDesc<-list(resTabDesc=resTab,resShapiroTest=resShap)
resDesc<-list(resTabDesc=resTab)
graphDesc<-ggplot(dt,aes(x))+geom_histogram(color='white',fill='steelblue')+labs(x=nameX)+theme_bw()
}
if(varType%in%c('character','factor','ordered')){
table(x,useNA = 'ifany')->tabChar
names(tabChar)[which(is.na(names(tabChar)))]<-'NAs'
resTab<-cbind(tabChar,round(tabChar/sum(tabChar,na.rm=T),Digits))
as.data.frame(resTab)->resTab
names(resTab)<-c('Freq','Perc')
resDesc<-list(resTabDesc=resTab)
graphDesc<-ggplot(dt,aes(x,fill=x))+geom_bar(width=0.35,color='white')+labs(x=nameX)+theme_bw()
}
resDescLst<-list(resDesc=resDesc,graphDesc=graphDesc)
return(resDescLst)
}
#' legal_set
#'
#' This function return logic value that indicate which x values are in legal set that is set by L
#'
#' @author sontron
#' @param x is a vector which could be numeric, character or datetime type
#' @param L is a expression indicate legal sets for x
#' @param method specified the method should be used
#' @param mode is the mode of x
#'
#' @return logic value which could be T,F,NA
#'
#' @examples
#' x=rnorm(10)
#' L="[0,Inf]"
#' legal_set(x,L,method='ranges',mode='numeric')
#' x.char=c('a','b','cd')
#' L.char=c('a','c')
#' legal_set(x=x.char,L=L.char,method='elements',mode='character')
#' legal_set(x=x.char,L=L.char,method='substrs',mode='character')
#' Date=c('2016-1-1 14:24:00',NA,'2016-6-1','2016-12-31 24:00:00')
#' L.date='[2016-1-1,2016-12-31)'
#' legal_set(x=Date,L=L.date,method='ranges',mode='datetime')
#'
#' @note the expression of L is flexible,like "(0,3)","[7,11]","[0,Inf)",c('[-1,0]','(1,10)'),
#' "[2013-1-2,2-13-10-11]",c('apple','orange')
#'
#' @export
legal_set<-function(x,L,method=c('ranges','substrs','elements')[1],mode=c('numeric','character','datetime')[1],type=c('fixed','regex')[1]){
stopifnot(method%in%c('elements','substrs','ranges')|mode%in%c('numeric','character','datetime'))
require('stringi')
## numeric and datetime value within a given range
if(method=='ranges'){
len<-length(L)
sapply(1:len,function(i){
if(grepl(',',L[i])){
unlist(strsplit(L[i],",",fixed=T))->l
if(mode=='numeric'){
if(grepl("(",l[1],fixed=T)) {as.numeric(gsub("(^[[:punct:]]|[[:punct:]]$)","",l[1]))->range_l;x>range_l->ind.l}
if(grepl(")",l[2],fixed=T)) {as.numeric(gsub("(^[[:punct:]]|[[:punct:]]$)","",l[2]))->range_r;x<range_r->ind.r}
if(grepl("[",l[1],fixed=T)) {as.numeric(gsub("(^[[:punct:]]|[[:punct:]]$)","",l[1]))->range_l;x>=range_l->ind.l}
if(grepl("]",l[2],fixed=T)) {as.numeric(gsub("(^[[:punct:]]|[[:punct:]]$)","",l[2]))->range_r;x<=range_r->ind.r}
}
if(mode=='datetime'){
as.POSIXct(x)->x
if(grepl("(",l[1],fixed=T)) {as.numeric(as.POSIXct(gsub("(^[[:punct:]]|[[:punct:]]$)","",l[1])))->range_l;x>range_l->ind.l}
if(grepl(")",l[2],fixed=T)) {as.numeric(as.POSIXct(gsub("(^[[:punct:]]|[[:punct:]]$)","",l[2])))->range_r;x<range_r->ind.r}
if(grepl("[",l[1],fixed=T)) {as.numeric(as.POSIXct(gsub("(^[[:punct:]]|[[:punct:]]$)","",l[1])))->range_l;x>=range_l->ind.l}
if(grepl("]",l[2],fixed=T)) {as.numeric(as.POSIXct(gsub("(^[[:punct:]]|[[:punct:]]$)","",l[2])))->range_r;x<=range_r->ind.r}
}
ifelse(ind.l&ind.r,T,F)->res
} else {
if(mode=='numeric'){
gsub("(^[[:punct:]]|[[:punct:]]$)","",L[i])->l
as.numeric(l)->range_p
}
if(mode=='datetime'){
gsub("(^[[:punct:]]|[[:punct:]]$)","",L[i])->l
as.POSIXct(x)->x
as.numeric(as.POSIXct(l))->range_p
}
ifelse(x==range_p,T,F)->res
}
return(res)
})->res
apply(matrix(res,nc=len),1,any)->res
}
# discrete numeric values or characters within a given set.
if(method=='elements'){
ifelse(is.element(x,L),T,F)->res
}
# subset of character strings within a given set.
if(method=='substrs'){
#paste(L,collapse=';')->pattern
len<-length(L)
if(type=='fixed'){
sapply(L,function(i){
stri_detect_fixed(x,i)->res
return(res)
})->res
}
if(type=='regex'){
sapply(L,function(i){
stri_detect_regex(x,i)->res
return(res)
})->res
}
apply(matrix(res,nc=len),1,any)->res
}
return(res)
}
#' legal_data
#'
#' can be used for data.frame object
#'
#' @author sontron
#' @param data is a data.frame object
#' @param val is a character vector to specified the variables in data. It's same as x in legal_set.
#' @param legal is a list to specified legal sets for each val. It's same as L in legal_set
#' @param method is a character vector specified methods for each variable in val param.
#' @param mode is a character vector specified modes for each variable in val.
#'
#' @return logical value such as T,F,NA.
#'
#' @examples
#' legal_data(data=iris,val=c('Species','Sepal.Length'),
#' legal=list(c('setosa','versicolor'),c('[3,6]')),method=c('elements','ranges'),
#' mode=c('character','numeric'))
#'
#' @export
legal_data<-function(data,val,legal,method=c('ranges','substrs','elements')[1],mode=c('numeric','character','datetime')[1],type=c('fixed','regex')[1]){
stopifnot(method%in%c('elements','substrs','ranges')|mode%in%c('numeric','character','datetime'))
require('stringi')
sapply(1:length(val),function(i){
x=data[,val[i]]
L=legal[[i]]
method[i]->methodi
mode[i]->modei
## numeric and datetime value within a given range
if(methodi=='ranges'){
len<-length(L)
sapply(1:len,function(i){
if(grepl(',',L)){
unlist(strsplit(L,",",fixed=T))->l
if(modei=='numeric'){
if(grepl("(",l[1],fixed=T)) {as.numeric(gsub("(^[[:punct:]]|[[:punct:]]$)","",l[1]))->range_l;x>range_l->ind.l}
if(grepl(")",l[2],fixed=T)) {as.numeric(gsub("(^[[:punct:]]|[[:punct:]]$)","",l[2]))->range_r;x<range_r->ind.r}
if(grepl("[",l[1],fixed=T)) {as.numeric(gsub("(^[[:punct:]]|[[:punct:]]$)","",l[1]))->range_l;x>=range_l->ind.l}
if(grepl("]",l[2],fixed=T)) {as.numeric(gsub("(^[[:punct:]]|[[:punct:]]$)","",l[2]))->range_r;x<=range_r->ind.r}
}
if(modei=='datetime'){
as.POSIXct(x)->x
if(grepl("(",l[1],fixed=T)) {as.numeric(as.POSIXct(gsub("(^[[:punct:]]|[[:punct:]]$)","",l[1])))->range_l;x>range_l->ind.l}
if(grepl(")",l[2],fixed=T)) {as.numeric(as.POSIXct(gsub("(^[[:punct:]]|[[:punct:]]$)","",l[2])))->range_r;x<range_r->ind.r}
if(grepl("[",l[1],fixed=T)) {as.numeric(as.POSIXct(gsub("(^[[:punct:]]|[[:punct:]]$)","",l[1])))->range_l;x>=range_l->ind.l}
if(grepl("]",l[2],fixed=T)) {as.numeric(as.POSIXct(gsub("(^[[:punct:]]|[[:punct:]]$)","",l[2])))->range_r;x<=range_r->ind.r}
}
ifelse(ind.l&ind.r,T,F)->res
} else {
if(modei=='numeric'){
gsub("(^[[:punct:]]|[[:punct:]]$)","",L[i])->l
as.numeric(l)->range_p
}
if(modei=='datetime'){
gsub("(^[[:punct:]]|[[:punct:]]$)","",L[i])->l
as.POSIXct(x)->x
as.numeric(as.POSIXct(l))->range_p
}
ifelse(x==range_p,T,F)->res
}
return(res)
})->res
apply(matrix(res,nc=len),1,any)->res
}
# discrete numeric values or characters within a given set.
if(methodi=='elements'){
ifelse(is.element(x,L),T,F)->res
}
# subset of character strings within a given set.
if(methodi=='substrs'){
#paste(L,collapse=';')->pattern
len<-length(L)
if(type=='fixed'){
sapply(L,function(i){
stri_detect_fixed(x,i)->res
return(res)
})->res
}
if(type=='regex'){
sapply(L,function(i){
stri_detect_regex(x,i)->res
return(res)
})->res
}
apply(matrix(res,nc=len),1,any)->res
}
return(res)})->res
apply(res,1,all)->result
return(result)
}
#' varClass
#'
#' is a function that determine the variable classes automatically.
#'
#'
#' @export
varClass<-function(data,lenTab=10,thresh=0.8){
t(sapply(data,function(x){
if(!is.element(class(x)[1],c('character','factor','ordered'))){
if(length(table(x))<=lenTab) {mode='char';descriptive='x'} else {mode='num';descriptive='x'}
} else {
if(sum(!is.na(as.numeric(as.vector(x))))/sum(!is.na(x))>=thresh){
if(length(table(x))>lenTab) {mode='num';descriptive='x'} else{mode='char';descriptive='x'}
} else {
mode='char'
if(length(table(x))>lenTab){descriptive=''} else{descriptive='x'}
}
}
return(mode)
}))->res
return(res)
}
#' myDescVec
#'
#' a function provide description statistics vor vectors
#'
#' @export
myDescVec<-function(x,varType=c('numeric','character','factor','integer','ordered')[1],Digits=4,nameX='x',seed=123,tabSort=TRUE){
if(is.character(data)) data=eval(as.name(data))
as.data.frame(data[,xvars])->dt
nameX=xvars
varType=class(data[,xvars])[1]
x=data[,xvars]
names(dt)<-nameX
if(varType%in%c('numeric','integer')){
summary(x)->resNum
if(length(resNum)==6) {
names(resNum)<-c('Min','0.25Qu.','Median','Mean','0.75Qu.','Max')
resNum[7]<-0
names(resNum)[7]<-'NAs'
}
if(length(resNum)==7) {
names(resNum)<-c('Min','0.25Qu.','Median','Mean','0.75Qu.','Max','NAs')
}
sdNum<-sd(x,na.rm=T)
round(resNum,Digits)->resNumRd
round(sdNum,Digits)->sdNumRd
desNormalNum<-paste(resNumRd[4],'±',sdNumRd,sep='')
names(desNormalNum)<-'NormalDist.'
desNonnormalNum<-paste(resNumRd[3],'[',resNumRd[2],',',resNumRd[5],']',sep='')
names(desNonnormalNum)<-'NonnormalDist.'
as.data.frame(c(resNumRd,desNormalNum,desNonnormalNum))->resTab
# if(length(x)<5000) {
# shapiro.test(x)->resShap
# } else {
# set.seed(seed)
# shapiro.test(x[sample(1:length(x),5000,rep=F)])->resShap
# }
resShap<-ksnormTest(x)
resTab<-as.data.frame(resTab)
names(resTab)<-'DescRes'
#resDesc<-list(resTabDesc=resTab,resShapiroTest=resShap)
resDesc<-list(resTabDesc=resTab,resShapiroTest=resShap)
graphDesc<-ggplot(dt,aes(x))+geom_histogram(color='white',fill='steelblue')+labs(x=nameX)
}
if(varType%in%c('character','factor','ordered')){
table(x,useNA = 'ifany')->tabChar
names(tabChar)[which(is.na(names(tabChar)))]<-'NAs'
resTab<-cbind(tabChar,round(tabChar/sum(tabChar,na.rm=T),Digits))
as.data.frame(resTab)->resTab
names(resTab)<-c('Freq','Perc')
resDesc<-list(resTabDesc=resTab)
graphDesc<-ggplot(dt,aes(x))+geom_bar(width=0.35,color='white')+labs(x=nameX)
}
resDescLst<-list(resDesc=resDesc,graphDesc=graphDesc)
return(resDescLst)
}
#' myhTest
#'
#' is a function of basic hTest collection based on data.frame
#'
#' @export
myhTest<-function(data,xvars,yvars='',alter=c('two.sided','less','greater')[1],paired=FALSE,confLevel=0.95,nullHyp=0,normalSampleSize=100) {
require(vcdExtra)
require(fBasics)
if(is.character(data)) data=eval(as.name(data))
nrow(data)->obsNo
if(yvars==''){
dt<-data.frame(x=data[,xvars[1]],stringsAsFactors = F)
#names(dt)[1]<-'x'
nameX<-xvars[1]
if(class(dt$x)[1]%in%c('character','ordered')){
table(dt$x)->Tab
sum(Tab,na.rm=T)->sumTab
myDesc(data=dt,xvars='x',varType='character')$resTabDesc->DescResult
hTestRes<-list(DescResult=DescResult,hTestResult=prop.test(as.numeric(Tab),rep(sumTab,length(Tab)),alternative=alter,conf.level=confLevel))
hTestGraph<-ggplot(dt,aes(x))+geom_bar(width=0.35,color='white')+labs(x=nameX)+theme_bw()
} else {
#pvalShapiro<-ifelse(obsNo>5000,shapiro.test(dt$x[sample(1:obsNo,5000)])$p.value,shapiro.test(dt$x)$p.value) ## 采用ksnormTest 见下行
pvalShapiro<-ksnormTest(dt$x)@test$p.value[1]
myDesc(data=dt,xvars='x',varType='numeric')$resTabDesc->DescResult
if(pvalShapiro>0.05|obsNo>normalSampleSize){
hTestRes<-list(DescResult=DescResult,hTestResult=t.test(dt$x,alternative=alter,mu=nullHyp,conf.level=confLevel))
} else {
hTestRes<-list(DescResult=DescResult,hTestResult=wilcox.test(dt$x,alternative=alter,mu=nullHyp,conf.level=confLevel))
}
hTestGraph<-ggplot(dt,aes(x))+geom_histogram(color='white',fill='steelblue')+labs(x=nameX)+theme_bw()
}
} else {
data[,c(xvars[1],yvars[1])]->dt
names(dt)<-c('x','y')
xvars[1]->nameX
yvars[1]->nameY
if(class(dt$x)[1]=='numeric'&class(dt$y)[1]=='numeric'){
dt$z<-dt$y-dt$x
nameDiff=paste(nameY,nameX,sep='-')
#pvalShapiroX<-ifelse(obsNo>5000,shapiro.test(dt$x[sample(1:obsNo,5000)])$p.value,shapiro.test(dt$x)$p.value)
#pvalShapiroY<-ifelse(obsNo>5000,shapiro.test(dt$y[sample(1:obsNo,5000)])$p.value,shapiro.test(dt$y)$p.value)
pvalShapiroX<-ksnormTest(dt$x)@test$p.value[1]
pvalShapiroY<-ksnormTest(dt$y)@test$p.value[1]
#pvalShapiroDiff<-ifelse(obsNo>5000,shapiro.test((dt$y-dt$x)[sample(1:obsNo,5000)])$p.value,shapiro.test(dt$y-dt$x)$p.value)
#pvalShapiroDiff<-ksnormTest(dt$y-dt$x)@test@p.value[1]
pvalShapiroDiff<-ksnormTest(dt$z)@test$p.value[1]
if(paired){
if(pvalShapiroDiff>0.05|obsNo>normalSampleSize){
hTestRes<-list(hTestResult=t.test(dt$z,mu=nullHyp,alternative = alter,conf.level=confLevel))
} else {
hTestRes<-list(hTestResult=wilcox.test(dt$z,mu=nullHyp,alternative = alter,conf.level=confLevel))
}
hTestGraph<-ggplot(dt,aes(z))+geom_histogram(color='white',fill='steelblue')+labs(x=nameDiff)+theme_bw()
} else {
if((pvalShapiroX>0.05&pvalShapiroY>0.05)|obsNo>100){
hTestRes<-list(hTestResult=cor.test(dt$x,dt$y,alternative = alter,conf.level = confLevel,method='pearson'))
} else {
hTestRes<-list(hTestResult=cor.test(dt$x,dt$y,alternative = alter,conf.level = confLevel,method='spearman'))
}
hTestGraph<-ggplot(dt,aes(x,y))+geom_point()+geom_smooth(method='lm')+labs(x=nameX,y=nameY)+theme_bw()
}
}
if(class(dt$x)[1]%in%c('character','ordered')&class(dt$y)[1]%in%c('character','ordered')){
table(dt$x,dt$y)->tab
if(paired){
hTestRes<-list(DescResult=tab,hTestResult=mcnemar.test(tab))
} else {
chisq.test(tab)->chisqTest
as.vector(chisqTest$expected)->chisqExp
if(any(chisqExp<1)||(sum(chisqExp<5)/length(chisqExp))>0.2) {
fisher.test(tab)->fisherTest
if(any(c(class(dt$x)[1],class(dt$y)[1])=='ordered')){
hTestRes<-list(DescResult=tab,chisqTest=chisq.test(tab),fisherTest=fisherTest,CMHtest=CMHtest(tab))
} else {
hTestRes<-list(DescResult=tab,chisqTest=chisq.test(tab),fisherTest=fisherTest)
}
} else {
if(any(c(class(dt$x)[1],class(dt$y)[1])=='ordered')){
hTestRes<-list(DescResult=tab,chisqTest=chisq.test(tab),CMHtest=CMHtest(tab))
} else {
hTestRes<-list(DescResult=tab,chisqTest=chisq.test(tab))
}
#hTestRes<-list(DescResult=tab,chisqTest=chisq.test(tab),CMHtest=CMHtest(tab))
}
}
hTestGraph<-ggplot(dt,aes(x,fill=y))+geom_bar(width=0.35,color='white',position='dodge')+labs(x=nameX)+scale_fill_discrete(nameY)+theme_bw()
}
if(all(c('numeric','character')%in%c(class(dt$x)[1],class(dt$y)[1]))){
which(c(class(dt$x)[1],class(dt$y)[1])=='numeric')->indNum
which(c(class(dt$x)[1],class(dt$y)[1])=='character')->indChar
dt[,c(indNum,indChar)]->dt
names(dt)<-c('x','grp')
tapply(dt$x,dt$grp,function(i)myDesc(data=dt,xvars='x',varType='numeric')$resDesc$resTabDesc)->X
sapply(as.vector(na.omit(unique(dt$grp))),function(i)myDesc(data=dt[dt$grp==i,],xvars='x',varType='numeric')$resDesc$resTabDesc)->X
matrix(nr=9,nc=length(X))->matDesc
for(i in 1:ncol(matDesc)){
matDesc[,i]<-as.character(X[[i]])
}
colnames(matDesc)<-names(X)
row.names(matDesc)<-c('Min','0.25Qu.','Median','Mean','0.75Qu.','Max','NAs','NormalDist.','NonnormalDist.')
as.data.frame(matDesc)->matDesc
c(nameX,nameY)[c(indNum,indChar)]->namesdt
#pvalShapiroX<-ifelse(obsNo>5000,shapiro.test(dt$x[sample(1:obsNo,5000)])$p.value,shapiro.test(dt$x)$p.value)
pvalShapiroX<-ksnormTest(dt$x)@test$p.value[1]
if(pvalShapiroX>0.05|obsNo>normalSampleSize){
if(length(unique(dt$grp))==2){
hTestRes<-list(DescResult=matDesc,hTestResult=t.test(dt$x~dt$grp,alternative=alter,mu=nullHyp,conf.level=confLevel))
} else {
hTestRes<-list(DescResult=matDesc,wholeTest=summary(aov(dt$x~dt$grp,alternative=alter,conf.level=confLevel)),pairedHTest=pairwise.t.test(dt$x,dt$grp,p.adj='bonf',alternative = alter))
}
} else {
if(length(unique(dt$grp))==2){
hTestRes<-list(DescResult=matDesc,hTestResult=wilcox.test(dt$x~dt$grp,alternative=alter,mu=nullHyp,conf.level=confLevel))
} else {
hTestRes<-list(DescResult=matDesc,wholeTest=kruskal.test(dt$x,as.factor(dt$grp),alternative=alter,conf.level=confLevel),pairedHTest=pairwise.wilcox.test(dt$x,dt$grp,p.adj='bonf',alternative = alter))
}
}
hTestGraph<-ggplot(dt,aes(grp,x,fill=grp))+geom_boxplot(width=0.35)+labs(x=namesdt[2],y=namesdt[1])+theme_bw()
}
if(all(c('numeric','ordered')%in%c(class(dt$x)[1],class(dt$y)[1]))){
which(c(class(dt$x)[1],class(dt$y)[1])=='numeric')->indNum
which(c(class(dt$x)[1],class(dt$y)[1])=='ordered')->indOrd
dt[,c(indNum,indOrd)]->dt
names(dt)<-c('x','grp')
as.numeric(dt$grp)->dt$grp
c(nameX,nameY)[c(indNum,indOrd)]->namesdt
#pvalShapiroX<-ifelse(obsNo>5000,shapiro.test(dt$x[sample(1:obsNo,5000)])$p.value,shapiro.test(dt$x)$p.value)
pvalShapiroX<-ksnormTest(dt$x)@test$p.value[1]
hTestRes<-list(hTestResult=cor.test(dt$x,dt$grp,method='spearman',conf.level=confLevel,alternative=alter))
hTestGraph<-ggplot(dt,aes(x,grp))+geom_point()+geom_smooth(method='lm')+labs(x=namesdt[1],y=namesdt[2])+theme_bw()
}
}
return(list(hTestRes=hTestRes,hTestGraph=hTestGraph))
}
#' myGlm
#'
#' is a function of glm models
#'
#' @export
myGlm<-function(Formula,
data,
weightsVar=1,
subset='all',
Family=c('gaussian','binomial','poisson'),
na.action=na.rm,
lower='~1'
){
require(fBasics)
if(is.character(data)) data=eval(as.name(data))
if(subset=='all'){
data<-data
} else {
subset(data,eval(parse(text=subset)))->data
}
yVar=stri_split_fixed(Formula,'~')[[1]][1]
Form1=as.formula(paste('~',stri_split_fixed(Formula,'~')[[1]][2]))
Form2=as.formula(lower)
unique(unlist(stri_split_regex(Formula,'[+*:~, -\\(\\)\\^]')))->varsAll
intersect(names(data),varsAll)->varsAll
if(is.character(Formula)) Formula=as.formula(Formula)
if(weightsVar==1){
data[,varsAll]->dat
na.omit(dat)->dat
Wt=rep(1,nrow(dat))
} else {
data[,c(varsAll,weightsVar)]->dat
na.omit(dat)->dat
Wt=dat[,weightsVar]
}
if(Family=='binomial'){
as.numeric(as.factor(dat[,yVar]))-1->dat[,yVar]
}
glm(Formula,data=dat,family=Family,x=T,y=T,weights=Wt)->fit
step(fit,scope=list(upper=Form1,lower=Form2),trace=F)->fitStep
#graphGlmFull=autoplot(fit)
#graphGlmStep=autoplot(fitStep)
return(list(glmResFull=fit,glmResStep=fitStep))
}
#' myTree
#'
#' is a function of Tree models
#'
#' @export
myTree<-function(Formula,
data,
# weightsVar,
subset='all',
na.action=na.rm,
Minsplit=30,
Minbucket=10,
Mincrit=0.05,
Maxdepth=3,
CP=0.01,
treeMethod=c('ctree','rpart')[1]
){
if(is.character(data)) data=eval(as.name(data))
if(subset=='all'){
data<-data
} else {
subset(data,eval(parse(text=subset)))->data
}
for(i in 1:ncol(data)){
if(class(data[,i])=='character'){
as.factor(data[,i])->data[,i]
}
}
#unique(unlist(stri_split_regex(Formula,'[+*:~(), ]')))->varsAll
unique(unlist(stri_split_regex(Formula,'[+*:~, -\\(\\)\\^]')))->varsAll
intersect(names(data),varsAll)->varsAll2
#setdiff(varsAll,c('Surv',''))->varsAll2
data[,varsAll2]->dt
na.omit(dt)->dt
if(treeMethod=='ctree'){
fitTree<-party:::ctree(as.formula(Formula),data=dt,controls=party:::ctree_control(maxdepth = Maxdepth,minsplit=Minsplit,minbucket = Minbucket,mincriterion = Mincrit))
}
if(treeMethod=='rpart'){
fitTree<-as.party(rpart(as.formula(Formula),data=dt,control=rpart.control(maxdepth = Maxdepth,minsplit=Minsplit,minbucket = Minbucket,cp = CP)))
}
return(fitTree)
}
#' myCox
#'
#' is a function of Coxph models
#'
#' @export
myCox<-function(Formula,
data,
weightsVar=1,
subset='all',
#Family=c('gaussian','binomial','poisson'),
strataVar='1',
#na.action=na.rm,
lower='~1'
){
if(is.character(data)) data=eval(as.name(data))
if(subset=='all'){
data<-data
} else {
subset(data,eval(parse(text=subset)))->data
}
stri_split_fixed(Formula,'~')[[1]][1]->lht
#yVar=stri_split_fixed(Formula,'~')[[1]][1]
Form1=as.formula(paste('~',stri_split_fixed(Formula,'~')[[1]][2]))
Form2=as.formula(lower)
#unique(unlist(stri_split_regex(Formula,'[+*:~(), ]')))->varsAll
unique(unlist(stri_split_regex(Formula,'[+*:~, -\\(\\)\\^]')))->varsAll
#setdiff(varsAll,c('Surv',''))->varsAll2
intersect(names(data),varsAll)->varsAll2
if(strataVar!='1'){
union(varsAll2,strataVar)->varsAll2
}
if(is.character(Formula)) Formula=as.formula(Formula)
if(weightsVar==1){
data[,varsAll2]->dat
na.omit(dat)->dat
Wt=rep(1,nrow(dat))
} else {
data[,c(varsAll2,weightsVar)]->dat
na.omit(dat)->dat
Wt=dat[,weightsVar]
}
coxph(Formula,data=dat,weights=Wt)->fit
step(fit,scope=list(upper=Form1,lower=Form2),trace=F)->fitStep
if(strataVar!='1'){
paste(lht,strataVar,sep='~')->FormulaStrata
as.formula(FormulaStrata)->FormulaStrata
survfit(FormulaStrata,data=dat)->survFitStrata
} else {
survfit(fitStep)->survFitStrata
}
return(list(coxResFull=fit,coxResStep=fitStep,fitStrata=survFitStrata))
}
#' myTable
#'
#' is a function of Coxph models
#'
#' @export
myTable<-function(Formula,data){
if(is.character(data)) data=eval(as.name(data))
stri_split_fixed(Formula,'~')[[1]][1]->lht
stri_split_fixed(Formula,'~')[[1]][-1]->rht
unlist(stri_split_fixed(lht,'+'))->lhtVars
unlist(stri_split_fixed(rht,'+'))->rhtVars
if(length(lhtVars)>1){
apply(data[,lhtVars],1,function(i)paste(i,collapse='_'))->data[,paste(lhtVars,collapse='_')]
as.formula(paste(paste(lhtVars,collapse='_'),rht,sep='~'))->Formula
mytable(Formula,data,method = 1)->res
} else {
if(lhtVars==''){
data$noGroupVar=1
as.formula(paste('noGroupVar',rht,sep='~'))->Formula
mytable(Formula,data,method = 1)->res
} else {
data[,lhtVars]->data[,paste(lhtVars,collapse='_')]
as.formula(paste(paste(lhtVars,collapse='_'),rht,sep='~'))->Formula
mytable(Formula,data,method = 1)->res
}
}
mytable2df(res)->res2
return(res2)
}
#' predictNodes
#'
#' predict the Node ID of tree object
predictNodes<-function (object, newdata, na.action = na.pass) {
where <-
if (missing(newdata))
object$where
else {
if (is.null(attr(newdata, "terms"))) {
Terms <- delete.response(object$terms)
newdata <- model.frame(Terms, newdata, na.action = na.action,
xlev = attr(object, "xlevels"))
if (!is.null(cl <- attr(Terms, "dataClasses")))
.checkMFClasses(cl, newdata, TRUE)
}
rpart:::pred.rpart(object, rpart:::rpart.matrix(newdata))
}
as.integer(row.names(object$frame))[where]
}
#' myImpute
#'
#' is a imputation function
#'
#' @export
myImpute<-function(data,impVars,modelVars,seed=1,treeParams=list(),method=c('sample','pred')[1]){
require(rpart)
require(partykit)
if(is.character(data)) data=eval(as.name(data))
if(missing(impVars)) impVars=names(data)
if(missing(modelVars)) modelVars=names(data)
#if(missing(treeParams)) treeParams=list(minsplit=20,minbucket=7,maxdepth=4,cp=0.01)
set.seed(seed)
unique(c(impVars,modelVars))->Vars
data[,Vars]->dat
for(i in impVars){
if(any(is.na(dat[,i]))) {
paste(i,'~',paste(names(dat)[-which(names(dat)==i)],collapse='+'),sep='')->formula
rpart(formula,data=dat,control=treeParams)->fit
#as.party(fit)->fitParty
if(length(unique(fit$where))==1){
dat[is.na(dat[,i]),i]<-sample(dat[!is.na(dat[,i]),i],length(dat[is.na(dat[,i]),i]),rep=T)
} else {
#fitted(fitParty)[,1]->nodeRaw
as.numeric(row.names(fit$frame)[fit$where])->nodeRaw
dat[!is.na(dat[,i]),i]->iRaw
if(method=='sample'){
predictNodes(fit,newdata=dat[is.na(dat[,i]),])->nodePred
iImp<-numeric(length(nodePred))
for(j in unique(nodePred)){
which(nodePred==j)->ind
sample(iRaw[nodeRaw==j],length(ind),rep=T)->iImp[ind]
}
dat[is.na(dat[,i]),i]<-iImp
}
if(method=='pred'){
if(class(dat[,i])[1]%in%c('character','factor','ordered')){
as.vector(predict(fit,dat[is.na(dat[,i]),],type='class'))->responsePred
} else {
predict(fit,dat[is.na(dat[,i]),],type='vector')->responsePred
}
dat[is.na(dat[,i]),i]<-responsePred
}
}
} else {
dat[,i]->dat[,i]
}
}
data[,Vars]<-dat
return(data)
}
#' myGplt
#'
#' a wrapper function based on ggplot for easy plot
#' @export
myGplt<-function(data,
x,
y='NULL',
size='NULL',
fill='NULL',
color='NULL',
shape='NULL',
alpha='NULL',
facetVar='NULL',
geom=c('box','hist','bar','line','jitter','point','smooth')[1],
labx='x',
laby='y',
title='my Plot',
theme=c('grey','bw','classic','dark')[1],
smoothMethod=c('lm','glm','loess','gam')[1],
barPos=c('stack','dodge')[1],
Bins=2,
# Colour='NULL',
# Fill='NULL',
# Size='NULL',
# Alpha=.5,
Width=.5,
...
){
geom=unlist(stri_split_fixed(geom,';'))
facetVar=unlist(stri_split_fixed(facetVar,';'))
if(is.character(data)) data=eval(as.name(data))
myGeom<-function(geom,...){
switch(geom,
box=geom_boxplot(...,width=Width),
hist=geom_histogram(...,aes(y=..density..),bins = Bins),
bar=geom_bar(...,aes(y=..count..),position=barPos,width=Width),
line=geom_line(...),
jitter=geom_jitter(...),
point=geom_point(...),
smooth=geom_smooth(...,method=smoothMethod)
)
}
P<-"ggplot(data=data,aes_string(x=x,y=y,size=size,fill=fill,color=color,shape=shape,alpha=alpha))"
geoms<-paste(paste('myGeom(',paste("'",geom,"'",sep=''),')',sep=''),collapse='+')
#names(sapply(geom,function(i)myGeom(i)))->geoms
paste(P,geoms,sep='+')->graph
if(!is.null(facetVar)&facetVar!='NULL'){
paste('facet_wrap(~',paste(facetVar,collapse='+'),')',sep='')->facet
paste(graph,facet,sep='+')->graph
}
myTheme<-function(theme){
switch(theme,
bw=theme_bw(),
grey=theme_grey(),
dark=theme_dark(),
classic=theme_classic()
)
}
themes<-paste("myTheme(",paste("'",theme,"'",sep=''),")",sep='')
myLab<-paste("labs(",paste("x=labx","y=laby","title=title",sep=','),")",sep='')
adjTitle<-'theme(plot.title=element_text(hjust=.5))'
paste(graph,themes,myLab,adjTitle,sep='+')->Graph
resGGplot<-eval(parse(text=Graph))
ggplotly(resGGplot)->resPlotly
return(list(resGGplot=resGGplot,resPlotly=resPlotly))
}
#' myGplt2
#'
#' test
#'
#' @export
myGplt2<-function(data,
x,
y='NULL',
size='NULL',
fill='NULL',
color='NULL',
shape='NULL',
alpha='NULL',
facetVar='NULL',
geom=c('box','hist','bar','line','jitter','point','smooth')[1],
labx='x',
laby='y',
title='my Plot',
theme=c('grey','bw','classic','dark')[1],
smoothMethod=c('lm','glm','loess','gam')[1],
barPos=c('stack','dodge')[1],
Bins='NULL',
Colour='NULL',
Fill='NULL',
Size='NULL',
Alpha='NULL',
Width='NULL',
Shape='NULL'
){
if(is.character(data)) data=eval(as.name(data))
geom=unlist(stri_split_fixed(geom,';'))
facetVar=unlist(stri_split_fixed(facetVar,';'))
if(is.character(data)) data=eval(as.name(data))
myGeom<-function(Bins,Colour,Size,Fill,Alpha,Width,Shape,geom){
switch(geom,
box=paste('geom_boxplot(',paste(ifelse(Fill%in%c('NULL',NA,''),'','fill=Fill,'),
ifelse(Width%in%c('NULL',NA,''),'','width=Width,'),
ifelse(Alpha%in%c('NULL',NA,''),'','alpha=Alpha'),
collapse=''
),')',sep=''),
hist=paste('geom_histogram(aes(y=..density..),',paste(ifelse(Fill%in%c('NULL',NA,''),'','fill=Fill,'),
ifelse(Bins%in%c('NULL',NA,''),'','bins=Bins,'),
ifelse(Alpha%in%c('NULL',NA,''),'','alpha=Alpha,'),
ifelse(Colour%in%c('NULL',NA,''),'','color=Colour'),
collapse=''
),')',sep=''),
bar=paste('geom_bar(aes(y=..count..),',paste(ifelse(Fill%in%c('NULL',NA,''),'','fill=Fill,'),
ifelse(Width%in%c('NULL',NA,''),'','width=Width,'),
ifelse(Alpha%in%c('NULL',NA,''),'','alpha=Alpha,'),
'position=barPos',
collapse=''
),')',sep=''),
line=paste('geom_line(',paste(ifelse(Colour%in%c('NULL',NA,''),'','color=Colour'),
collapse=''
),')',sep=''),
jitter=paste('geom_jitter(',paste(ifelse(Colour%in%c('NULL',NA,''),'','color=Colour,'),
ifelse(Alpha%in%c('NULL',NA,''),'','alpha=Alpha'),
collapse=''
),')',sep=''),
point=paste('geom_point(',paste(ifelse(Colour%in%c('NULL',NA,''),'','color=Colour,'),
ifelse(Shape%in%c('NULL',NA,''),'','shape=Shape,'),
ifelse(Size%in%c('NULL',NA,''),'','size=Size,'),
ifelse(Alpha%in%c('NULL',NA,''),'','alpha=Alpha'),
collapse=''
),')',sep=''),
smooth=paste('geom_smooth(method=smoothMethod,',paste(ifelse(Colour%in%c('NULL',NA,''),'','color=Colour,'),
collapse=''
),')',sep='')
)->Geom
stri_replace_all_regex(Geom,'(,\\)|, \\))',')')->Geom
# return(Geom)
return(eval(parse(text=Geom)))
}
P<-"ggplot(data=data,aes_string(x=x,y=y,size=size,fill=fill,color=color,shape=shape,alpha=alpha))"
geoms<-paste(paste('myGeom(Bins=Bins,Colour=Colour,Fill=Fill,Size=Size,Alpha=Alpha,Width=Width,Shape=Shape,',paste("'",geom,"'",sep=''),')',sep=''),collapse='+')
paste(P,geoms,sep='+')->graph
if(!is.null(facetVar)&facetVar!='NULL'){
paste('facet_wrap(~',paste(facetVar,collapse='+'),')',sep='')->facet
paste(graph,facet,sep='+')->graph
}
myTheme<-function(theme){
switch(theme,
bw=theme_bw(),
grey=theme_grey(),
dark=theme_dark(),
classic=theme_classic()
)
}
themes<-paste("myTheme(",paste("'",theme,"'",sep=''),")",sep='')
myLab<-paste("labs(",paste("x=labx","y=laby","title=title",sep=','),")",sep='')
adjTitle<-'theme(plot.title=element_text(hjust=.5))'
paste(graph,themes,myLab,adjTitle,sep='+')->Graph
resGGplot<-eval(parse(text=Graph))
ggplotly(resGGplot)->resPlotly
return(list(resGGplot=resGGplot,resPlotly=resPlotly))
}
#' myProphet
#'
#' is a revised version of prophet that combine some other functions from xts package for dealing data.frame, multiple variables and adding group variables.
#' return the history data and graphs as well as predicted graph using prophet.
#'
#'
#' @export
myProphet<-function(data,
tsVar,
tsFormat='ymd',
measureVars,
groupVars=1,
Period='weeks',
FN,
Cap=-1,
Floor=-1,
Growth='linear',
# changepointRange=.8,
# changepointPriorScale = 0.05,
# mcmcSamples = 0,
# intervalWidth=.8,
H=10,
yearlyS='auto',
dailyS='auto',
weeklyS='auto'
){
require(xts)
require(lubridate)
require(broom)
if(is.character(data)) data=eval(as.name(data))
unlist(stri_split_fixed(measureVars,';'))->measureVars
unlist(stri_split_fixed(groupVars,';'))->groupVars
which(is.na(data[,tsVar]))->ind
if(length(ind)>0){data[-ind,]->dat} else {dat=data}
# dat$cap<-Cap
# dat$floor<-Floor
if(groupVars%in%c('1','','NA','NULL')){
dat[,'tsGroupVar']<-1
} else {
myApply(data = dat,vars = groupVars,MARGIN = 1,FUN = function(x)paste(x,collapse='_'))->dat$tsGroupVar
}
fn_aggre(dat,group=c(tsVar,'tsGroupVar'),val=measureVars,FN)->resTmp
lapply(unique(resTmp[,'tsGroupVar']),function(i){
subset(resTmp,tsGroupVar==i)->resTmpi
resTmpi[,-which(names(resTmpi)=='tsGroupVar')]->resTmpii
parse_date_time(resTmpii[,tsVar],orders=tsFormat)->TS
xts(resTmpii[,-which(names(resTmpii)==tsVar),drop=F],TS)->resTmpii
lapply(resTmpii,function(j){
period.apply(j,endpoints(j,Period),FUN=eval(parse(text=FN)))
})->resTmpj
do.call(merge,resTmpj)->resTmpj
as.data.frame(resTmpj)->resTmpj
resTmpj[,'tsGroupVar']<-i
return(resTmpj)
})->resTmpNew
do.call(rbind,resTmpNew)->resFinal
resFinal[,tsVar]<-row.names(resFinal)
if(groupVars%in%c('1','','NA','NULL')){
resFinal[,-which(names(resFinal)=='tsGroupVar')]->resFinal
melt(resFinal,id.vars=tsVar)->dfGraph
} else {
names(resFinal)[which(names(resFinal)=='tsGroupVar')]<-paste(groupVars,collapse='_')
melt(resFinal,id.vars=c(tsVar,paste(groupVars,collapse='_')))->dfGraph
}
parse_date_time(resFinal[,tsVar],tsFormat)->resFinal[,tsVar]
#dfGraph[,tsVar]<-row.names(dfGraph)
parse_date_time(dfGraph[,tsVar],tsFormat)->dfGraph[,tsVar]
row.names(resFinal)<-NULL
row.names(dfGraph)<-NULL
if(groupVars%in%c('1','','NA','NULL')){
ggplot(dfGraph,aes_string(tsVar,'value',color='variable'))+geom_point()+geom_line()+facet_wrap(~variable,scales='free')->graph
} else {
#ggplot(dfGraph,aes_string(tsVar,'value',color='variable'))+geom_point()+geom_line()+facet_wrap(as.formula(paste('~',paste('variable',paste(groupVars,collapse='_'),sep='+'),sep='')),scales='free')->graph
ggplot(dfGraph,aes_string(tsVar,'value',color=paste(groupVars,collapse='_')))+geom_point()+geom_line()+facet_wrap(~variable,scales='free')->graph
}
if(groupVars%in%c('1','','NA','NULL')){
Lst<-list()
for(i in 1:length(unique(dfGraph$variable))){
unique(dfGraph$variable)[i]->ii
subset(dfGraph,variable==ii)->dfGraphi
which(is.na(dfGraphi[,tsVar]))->ind
#dfGraphi[-ind,]->dfGraphi
#dfGraphi$floor<-Floor
names(dfGraphi)[which(names(dfGraphi)==tsVar)]<-'ds'
names(dfGraphi)[which(names(dfGraphi)=='value')]<-'y'
ifelse(Cap<0,max(dfGraphi$y,na.rm=T),Cap)->dfGraphi$cap
ifelse(Floor<0,min(dfGraphi$y,na.rm=T),Floor)->dfGraphi$floor
prophet(dfGraphi,
growth=Growth,
# changepoint.range=changepointRange,
yearly.seasonality = yearlyS,
daily.seasonality = dailyS,
weekly.seasonality = weeklyS)->m
future<-make_future_dataframe(m,periods=H,freq=stri_replace_last_regex(Period,'[a-z]',''))
# future$cap<-Cap
# future$floor<-Floor
ifelse(Cap<0,max(dfGraphi$y,na.rm=T),Cap)->future$cap
ifelse(Floor<0,min(dfGraphi$y,na.rm=T),Floor)->future$floor
predict(m,future)->Pred
prophet:::df_for_plotting(m,Pred)->dfGraphiNew
# names(dfGraphiNew)[1]<-tsVar
dfGraphiNew[,'variable']<-ii
as.character(dfGraphiNew$variable)->dfGraphiNew$variable
Lst[[i]]<-dfGraphiNew
}
do.call(rbind,Lst)->dfGraphNew
names(dfGraphNew)[1]<-tsVar
if(Growth=='logistic'){
graphProphet<-ggplot(dfGraphNew,aes_string(tsVar,'y'))+
geom_point(size=.75,na.rm=T)+geom_line(aes(y=yhat),color='#0072B2',na.rm=T)+
geom_line(aes(y=floor),linetype='dashed',na.rm=T)+
geom_line(aes(y=cap),linetype='dashed',na.rm=T)+
geom_ribbon(aes(ymin=yhat_lower,ymax=yhat_upper),alpha=.2,fill='#0072B2',na.rm=T)+
facet_wrap(~variable,scale='free')
} else {
graphProphet<-ggplot(dfGraphNew,aes_string(tsVar,'y'))+
geom_point(size=.75,na.rm=T)+geom_line(aes(y=yhat),color='#0072B2',na.rm=T)+
geom_ribbon(aes(ymin=yhat_lower,ymax=yhat_upper),alpha=.2,fill='#0072B2',na.rm=T)+
facet_wrap(~variable,scale='free')
}
} else {
unique(dfGraph$variable)->Var
unique(dfGraph[,paste(groupVars,collapse='_')])->Grp
Lst<-list()
for(i in 1:length(Var)){
for(j in 1:length(Grp)){
subset(dfGraph,variable==Var[i]&dfGraph[,paste(groupVars,collapse='_')]==Grp[j])->dfGraphi
which(is.na(dfGraphi[,tsVar]))->ind
#dfGraphi[-ind,]->dfGraphi
# dfGraphi$cap<-Cap
# dfGraphi$floor<-Floor
names(dfGraphi)[which(names(dfGraphi)==tsVar)]<-'ds'
names(dfGraphi)[which(names(dfGraphi)=='value')]<-'y'
ifelse(Cap<0,max(dfGraphi$y,na.rm=T),Cap)->dfGraphi$cap
ifelse(Floor<0,min(dfGraphi$y,na.rm=T),Floor)->dfGraphi$floor
prophet(dfGraphi,
growth=Growth,
# changepoint.range=changepointRange,
yearly.seasonality = yearlyS,
daily.seasonality = dailyS,
weekly.seasonality = weeklyS)->m
future<-make_future_dataframe(m,periods=H,freq=stri_replace_last_regex(Period,'[a-z]',''))
# future$cap<-Cap
# future$floor<-Floor
ifelse(Cap<0,max(dfGraphi$y,na.rm=T),Cap)->future$cap
ifelse(Floor<0,min(dfGraphi$y,na.rm=T),Floor)->future$floor
predict(m,future)->Pred
prophet:::df_for_plotting(m,Pred)->dfGraphiNew
dfGraphiNew[,'variable']<-Var[i]
dfGraphiNew[,paste(groupVars,collapse='_')]<-Grp[j]
as.character(dfGraphiNew$variable)->dfGraphiNew$variable
Lst[[(i-1)*length(Var)+j]]<-dfGraphiNew
}
}
do.call(rbind,Lst)->dfGraphNew
names(dfGraphNew)[1]<-tsVar
if(Growth=='logistic'){
graphProphet<-ggplot(dfGraphNew,aes_string(tsVar,'y',color=paste(groupVars,collapse='_')))+
geom_point(size=.75,na.rm=T)+geom_line(aes(y=yhat),color='#0072B2',na.rm=T)+
geom_ribbon(aes(ymin=yhat_lower,ymax=yhat_upper),alpha=.2,fill='#0072B2',na.rm=T)+
geom_line(aes(y=floor),linetype='dashed',na.rm=T)+
geom_line(aes(y=cap),linetype='dashed',na.rm=T)+
facet_wrap(as.formula(paste('~',paste('variable',paste(groupVars,collapse='_'),sep='+'))),scale='free')
} else {
graphProphet<-ggplot(dfGraphNew,aes_string(tsVar,'y',color=paste(groupVars,collapse='_')))+
geom_point(size=.75,na.rm=T)+geom_line(aes(y=yhat),color='#0072B2',na.rm=T)+
geom_ribbon(aes(ymin=yhat_lower,ymax=yhat_upper),alpha=.2,fill='#0072B2',na.rm=T)+
facet_wrap(as.formula(paste('~',paste('variable',paste(groupVars,collapse='_'),sep='+'))),scale='free')
}
}
return(list(tabRes=resFinal,graphRes=graph,graphPredict=graphProphet))
}
#' medstats
#'
#' is a launcher function
#'
#' @export
fastR <- function() {
library(shiny)
runApp(system.file("app", package = "fastR"), launch.browser = TRUE)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.