#' Linear Composite Estimator from overlap and non overlapping consecutive subsamples direct totals
#' @description
#' Consider a sequence of monthly samples \eqn{(S_m)_{m\in\{1,\ldots,M\}}}.
#' For each individual \eqn{k} of the sample \eqn{m}, one observes the employment status \eqn{Y_{k,m}} (A binary variable) of individual \eqn{k} at time \eqn{m}, and
#' the survey weight \eqn{w_{k,m}}.
#' The following program allows to compute recursively for \eqn{m=1,\ldots,M} the Census composite estimator of the total of \eqn{Y_{.,m}}
#' with coefficients defined recursively as follows:
#'
#'
#' For \eqn{m=1}, \eqn{\hat{t}_{Y_{.,1}}=\sum_{k\in S_1}w_{k,m}Y_{k,m}}.
#'
#' For \eqn{m\geq 2},
#' \deqn{\hat{t}_{Y_{.,m}}= \left[\begin{array}{c} \hat{t}_{Y_{.,m-1}}\\ \sum_{k\in S_{m}} w_{k,m} Y_{k,m}\\\sum_{k\in S_{m-1}\cap S_{m}} w_{k,m-1} Y_{k,m-1}\\ \sum_{k\in S_{m-1}\cap S_{m}} w_{k,m} Y_{k,m}\\\sum_{k\in S_{m}\setminus S_{m-1}} w_{k,m} Y_{k,m}\end{array}\right]^{\mathrm{T}}\times \left[\begin{array}{c}\alpha_{(-1)}\\\alpha_{0}\\\beta_{(-1)}\\\beta_0\\\gamma_0\end{array}\right]}
#'
#'
#' This function computes the estimators for given values of \eqn{\alpha,\beta,\gamma}.
#'
#' An example of use of such estimate is the Census Bureau AK estimator: it is a special case of this estimator, with the values of \eqn{\alpha,\beta,\gamma} that are given as a function of two parameters A and K:
#' \deqn{\left[\begin{array}{c}\alpha_{(-1)}\\\alpha_0\\\beta_{(-1)}\\\beta_0\\\gamma_0\end{array}\right]=\left[\begin{array}{c}K\\ 1-K\\ -4~K/3\\(4K-A)/3 \\A \end{array}\right]}
#' for more references, please refer to the function \code{CompositeRegressionEstimation::AK}.
#'
#' See ``CPS Technical Paper (2006). Design and Methodology of the Current Population Survey. Technical Report 66, U.S. Census Bureau."
#' \deqn{ \begin{array}{clll}\hat{t}_{Y_{.,m}}=&& K&\times \hat{t}_{Y_{.,m-1}}\\&+&(1-K)&\times \sum_{k\in S_{m}} w_{k,m} Y_{k,m}\\&+&(-4K/3)&\times\sum_{k\in S_{m-1}\cap S_{m}} w_{k,m-1} Y_{k,m-1}\\&+&(4K-A)/3 &\times\sum_{k\in S_{m-1}\cap S_{m}} w_{k,m} Y_{k,m}\\&+&A&\times\sum_{k\in S_{m}\setminus S_{m-1}} w_{k,m} Y_{k,m}\end{array}}
#'
#'
#' Computing the estimators recursively is not very efficient. At the end, we get a linear combinaison of month in sample estimates
#' The functions \code{AK3}, and \code{WSrg} computes the linear combination directly and more efficiently.
#'
#' For the CPS, in month \eqn{m} the overlap \eqn{S_{m-1}\cap S_{m}} correspond to the individuals in the sample \eqn{S_m} with a value of month in sample equal to 2,3,4, 6,7 or 8.
#' The overlap \eqn{S_{m-1}\cap S_{m}} correspond to the individuals in the sample \eqn{S_m} with a value of month in sample equal to 2,3,4, 6,7 or 8. as well as
#' individuals in the sample \eqn{S_{m-1}} with a value of month in sample equal to 1,2,3, 5,6 or 7.
#' When parametrising the function, the choice would be \code{group_1=c(1:3,5:7)} and \code{group0=c(2:4,6:8)}.
#' @param list.tables a list of tables
#' @param w a character string: name of the weights variable (should be the same in all tables)
#' @param list.y a vector of variable names
#' @param id a character string: name of the identifier variable (should be the same in all tables)
#' @param groupvar a character string: name of the rotation group variable (should be the same in all tables)
#' @param groups_1 a character string:
#' @param groups0 if \code{groupvar} is not null, a vector of possible values for L[[groupvar]]
#' @return
#' @seealso CompositeRegressionEstimation::AK
#' @examples
#' library(dataCPS)
#'
#' data(cps200501,cps200502,cps200503,cps200504,
#' cps200505,package="dataCPS")
#' list.tables<-list(cps200501,cps200502,cps200503,cps200504,
#' cps200505)
#' w="pwsswgt";id=c("hrhhid","pulineno");groupvar=NULL;list.y="pemlr";dft0.y=NULL;
#' groups_1=NULL;groups0=NULL;Coef=c(alpha_1=0,alpha0=1,beta_1=0,beta0=0,gamma0=0)
#' composite(list.tables,w=w,list.y="pemlr",id=id,groupvar=groupvar)
#' ##With the default choice of parameters for \code{Coef}, the composite is equal to the direct estimator: we check
#' WS(list.tables = list.tables,weight = w,list.y = list.y)
#' ## Example of use of a group variable.
#' w="pwsswgt";id=NULL;groupvar="hrmis";list.y="pemlr";dft0.y=NULL;
#' groups_1=c(1:3,5:7);groups0=c(2:4,6:8);Coef=c(alpha0=1,alpha_1=0,beta_1=0,beta0=0,gamma0=0)
#' composite(list.tables,w=w,list.y="pemlr",id=id,groupvar="hrmis")
#'
#' ## Check we get the same results with two different methods.
#' x=expand.grid(mis=paste0(1:8),k=1:10)
#' list.tables<-plyr::llply(1:5,function(m){dplyr::mutate(x,w=1,y1=mis*10^(m-1),y2=1)})
#' Y<-WSrg(list.tables,weight="w",list.y=c("y1","y2"),rg="mis",dimname1="m")
#'
#' alpha0=runif(1);
#' alpha_1=1-alpha0;
#' beta0=runif(1)
#' beta_1=runif(1)
#' gamma0=runif(1)
#' Coef=c(alpha_1=alpha_1,
#' alpha0=alpha0,
#' beta0=beta0,
#' beta_1=beta_1,
#' gamma0=gamma0)
#' W<-W.rec(months=dimnames(Y)$m,
#' groups=dimnames(Y)$mis,
#' Coef=Coef)
#' CC1<-arrayproduct::`%.%`(W,Y,list(c=character(0),n="m2",p=c("m1","rg1")),list(c=character(0),p=c("m","mis"),q="y"))
#' w="w";id=NULLgroupvar="mis";list.y=c("y1","y2");dft0.y=NULL;
#' groups_1=paste0(c(1:3,5:7));groups0=paste0(c(2:4,6:8));
#' CC2<-composite(list.tables,w=w,list.y=c("y1","y2"),groupvar="mis",groups_1=groups_1,groups0=groups0,Coef=Coef)
#' Y[,,1];CC1[,1];CC2[,1]
#' W[1,1,];W[2,1,];sum(W[2,,])
#'
composite <-
function(list.tables,
w,
list.y,
id=NULL,
groupvar=NULL,
groups_1=NULL,
groups0=NULL,
#Coef=c(Total_1=1,Total0=0,Totalinter0=0,Totalinter_1=0,Totaldiff_1=0),
Coef=c(alpha_1=0,alpha0=1,beta_1=0,beta0=0,gamma0=0),
dft0.y=NULL){
# list.tables=list(...);
#controls
if(is.null(id)&is.null(groupvar)){stop("Composite: give a value for id or for groupvar")}
#if(max(!sapply(list.tables,is.data.frame))){stop("Composite: First arguments of AK must be data frames", domain = NA)}
#if(!is.character(id)||!is.vector(id)){stop("Composite: Argument id is not a character vector", domain = NA)}
if(!is.character(w)||!is.vector(w)){stop("Composite: Argument w is not a character vector", domain = NA)}
if(!is.character(list.y)||!is.vector(list.y)){stop("Composite: argument list.y is not a character vector", domain = NA)}
for (dff in list.tables){
if (!is.null(id)){
if (max(!id %in% names(dff))){stop(gettextf("Composite: variable(s) %s are not in some dataframes",paste(id[!id %in% names(dff)],collapse=", ")), domain = NA)}}
if (!(w %in% names(dff))){stop(gettextf("Composite: weight variable %s is not in dataframe",w), domain = NA)}
if (max(!list.y %in% names(dff))){stop(gettextf("Composite: variable(s) %s are not in dataframe",paste(list.y[!list.y %in% names(dff)],collapse=", ")), domain = NA)}
if (!is.double(as.data.frame(dff)[,w])){stop(gettextf("Composite: The weight variable %s of dataframes of ... must be a numeric variable.",w), domain = NA)}}
dft.y<-dft0.y #initial total for y
#if NA, initial computation of totals of contral variables.
if(is.null(dft.y)){
dft.y<-WS(list(list.tables[[1]]),weight=w,list.y=list.y)}
listtot.y<-vector()
dfEstT<-dft.y
#Loop: for every table (every T), starting from the second table
LL<-length(list.tables)
creegroupvar<-is.null(groupvar)
if(creegroupvar){
keeps=c(w,id)}
if(!creegroupvar){
keeps=c(w,groupvar)}
for (i in 2:LL){
#WS(list.tables[i],w,list.y)
df_1f<-factorisedf(list.tables[[i-1]],list.y);
df0f<-factorisedf(list.tables[[i]] ,list.y);
df_1=data.frame((list.tables[[i-1]])[,keeps,drop=FALSE],df_1f$fdf)
df0=data.frame((list.tables[[i]]) [,keeps,drop=FALSE],df0f$fdf)
names(df_1)<-c(keeps,df_1f$nfdf)
names(df0)<-c(keeps,df0f$nfdf)
#WS(list.tables[i],w,list.y);WS(list.tables=list(df0),weight=w,df0f$nfdf);nrow(df0)
if(creegroupvar){
groupvar <- "groupvar"
groups_1<-groups0 <-1
df0<-merge(df0 ,unique("$<-"(df_1[id],groupvar,1)),by=id,all.x=TRUE)
#WS(list.tables[i],w,list.y);WS(list.tables=list(df0),weight=w,df0f$nfdf);nrow(df0)
df_1<-merge(df_1,unique("$<-"(df0[id],groupvar,1)),by=id,all.x=TRUE)}
#if(!is.null(groupvar)){keeps=c(id,w,groupvar)}
listtot.y<-union(listtot.y,union(df0f$nfdf,df_1f$nfdf))
for(y in setdiff(listtot.y,df_1f$nfdf)){df_1[y]<-0}
for(y in setdiff(listtot.y,df0f$nfdf)){df0[y]<-0}
#computation of the composite estimator
amettrea0<-function(matrix,listvar){
amettrea0<-setdiff(listvar, colnames(matrix))
zeros<-matrix(0,nrow(matrix),length(amettrea0));
colnames(zeros)<-amettrea0
return(cbind(matrix,zeros))}
dft.y <-amettrea0(dft.y,listtot.y)
dfEstT<-amettrea0(dfEstT,listtot.y)
df_1 <-amettrea0(df_1,listtot.y)
df0 <-amettrea0(df0,listtot.y)
#TotEstimate_1 <-as.matrix(WS(list(df_1) ,weight=w,list.y=listtot.y)[listtot.y])
TotEstimate0 <-WS(list(df0) ,weight=w,list.y=listtot.y)[,listtot.y]
inter0<-is.element(df0[[groupvar]],groups0)
inter_1<-is.element(df_1[[groupvar]],groups_1)
TotEstimateIntert0<-WS(list(df0[inter0,]),weight=w,list.y=listtot.y)[,listtot.y]
TotEstimateIntert_1<-WS(list(df_1[inter_1,]),weight=w,list.y=listtot.y)[,listtot.y]
TotEstimateDiff0 <-WS(list(df0[!inter0,]),weight=w,list.y=listtot.y)[,listtot.y]
dft.y<-t(cbind(dft.y[,listtot.y],TotEstimate0,TotEstimateIntert_1,TotEstimateIntert0,TotEstimateDiff0)%*%
Coef[c("alpha_1","alpha0","beta_1","beta0","gamma0")])
colnames(dft.y)<-listtot.y
dfEstT<-rbind(dfEstT[,listtot.y,drop=FALSE],dft.y[,listtot.y,drop=FALSE])
}
rownames(dfEstT)<-names(list.tables)
return(dfEstT)}
#' general month in sample estimates weights for recursive linear combinaison of mis estimates
#'
#' @param months an integer, indicating number of months
#' @param nmonth an integer, indicating number of months
#' @param ngroup a vector of character strings or numeric string
#' @param groups a vector of character strings or numeric string
#' @param Coef a named vector of 5 numeric value
#' @param S a vector of integers indicating the indices of the rotation group in the sample that overlap with the previous sample: groups[S] are the overlapping rotation groups
#' @param S_1 a vector of integers indicating the indices of the corresponding rotation group of S in the previous month
#' @return an array of AK coefficients W[m2,m1,mis1] such that Ak estimate for month m2 is sum(W[y2,,])*Y) where Y[m1,mis1] is direct estimate on mis mis1 for emp stat y1 at month m1.
#' @examples
#' alpha0=runif(1);
#' alpha_1=1-alpha0;
#' beta0=runif(1)
#' beta_1=runif(1)
#' gamma0=runif(1)
#' W<-W.rec(months=1:3,
#' groups=1:8,
#' Coef=c(alpha_1=alpha_1,
#' alpha0=alpha0,
#' beta_1=beta_1,
#' beta0=beta0,
#' gamma0=gamma0))
#' dimnames(W)
#' if(all(W[1,1,]==1)){"this part is fine"}else{"there is a problem"}
#' m<-sample(2:3,1)
#' if(all(abs(W[m,m,c(1,5)]-(alpha0+gamma0))<1e-10)){"this part is fine"}else{"there is a problem"}
#' if(all(abs(W[m,m,c(2:4,6:8)]-(alpha0+beta0))<1e-10)){"this part is fine"}else{"there is a problem"}
#' if(all(abs(W[m,m-1,c(1:3,5:7)]-(alpha_1*W[m-1,m-1,c(1:3,5:7)]+beta_1))<1e-10)){"this part is fine"}else{"there is a problem"}
#' if(all(abs(W[m,m-1,c(4,8)]-(alpha_1*W[m-1,m-1,c(4,8)]))<1e-10)){"this part is fine"}else{"there is a problem"}
#' x=expand.grid(mis=paste0(1:8),k=1:10)
#' list.tables<-plyr::llply(1:5,function(m){dplyr::mutate(x,w=1,y1=strtoi(mis)*10^(m-1),y2=1)})
#' Y<-WSrg(list.tables,weight="w",list.y=c("y1","y2"),rg="mis",dimname1="m")
#'
#' xx<-function(Coef){
#' W=W.rec(months = dimnames(Y)[["m"]],
#' groups = dimnames(Y)[["mis"]],
#' S=c(2:4,6:8),Coef=Coef)
#' #plyr::aaply(1:dim(W)[1],1,function(i){plyr::aaply(W[i,,],1,sum)})
#' CC1<-arrayproduct::`%.%`(W,Y,list(c=character(0),n="m2",p=c("m1","rg1")),list(c=character(0),p=c("m","mis"),q="y"))
#' w="w";id=NULL;groupvar="mis";list.y=c("y1","y2");dft0.y=NULL;
#' groups_1=paste0(c(1:3,5:7));groups0=paste0(c(2:4,6:8));
#' CC2<-composite(list.tables,w=w,list.y=c("y1","y2"),id=id,groupvar=groupvar,groups_1=groups_1,groups0=groups0,Coef=Coef)
#' a=sample(5,1)
#' c(w=CC1[a,1],r=CC2[a,1])}
#' xx(c(alpha_1=0,alpha0=1,beta_1=0,beta0=0,gamma0=0))
#' xx(Coef=c(alpha_1=runif(1),alpha0=runif(1),beta_1=runif(1),beta0=runif(1),gamma0=runif(1)))
W.rec<-function(months,
groups,
S=c(2:4,6:8),
S_1=S-1,
Coef=c(alpha_1=0,alpha0=1,beta_1=0,beta0=0,gamma0=0)){
nmonth<-length(months)
ngroup<-length(groups)
W<-array(0,c(nmonth,nmonth,ngroup))
dimnames(W)<-list(m2=months,m1=months,rg1=groups)
Hmisc::label(W)<-"Coefficient matrix W[m2,m1,mis1] such that Ak estimate for month m2 is sum(W[y2,,])*Y) where Y[m1,mis1] is direct estimate on mis mis1 for emp stat y1 at month m1"
Sbar<-setdiff(1:ngroup,S)
W[1,1,]<-1
for(i in 2:nmonth){
W[i,,]<-W[(i-1),,]*Coef["alpha_1"]
W[i,i,]<-Coef["alpha0"]
W[i,i-1,S_1]<-W[i,i-1,S_1]+Coef["beta_1"]
W[i,i,S]<-W[i,i,S]+Coef["beta0"]
W[i,i,Sbar]<-W[i,i,Sbar]+Coef["gamma0"]}
return(W)}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.