library(mipfp)
library(stringr)
library(dplyr)
library(truncdist)
source("R/cnbinom.pars_function.R")
source("R/seedmatrix_function.R")
source("R/reweight.contingencytable.R")
source("R/reweight.univariatetable.R")
source("R/format_data_for_functions.R")
rec<-function (X = NULL, Y = NULL, Xlowerbound, Xupperbound, Ylowerbound, Yupperbound,
seed.matrix = NULL, seed.estimation.method = NULL) {
# create row and column ranges
rowrange<-Xlowerbound: Xupperbound
colrange<-Ylowerbound: Yupperbound
#first have to check if X and Y are tables or averages because this effects how mu is found
#case where 2 averages
if (length(X)==1 && length(Y)==1) {
rowmu=X
colmu=Y
# truncated Poisson
rowprob<-dtrunc(rowrange,lambda =rowmu, spec = "pois", a = Xlowerbound - 1, b = Xupperbound)
colprob<-dtrunc(colrange,lambda =colmu, spec = "pois", a = Ylowerbound - 1, b = Yupperbound)
# updating users about what is happening
print("Truncated Poisson distributions were calculated for both the X and Y variable (i.e. Var(X) = E(X) and Var(Y) = E(Y) has to be assumed when only averages are provided)")
} #end of case where 2 averages
#case where 2 tables
if (length(X)>1 && length(Y)>1) {
roww=cnbinom.pars(X)
rowmu=roww$Average
rowr = roww$Dispersion
coll=cnbinom.pars(Y)
colmu=coll$Average
colr = coll$Dispersion
#Negative binomial
rowprob<-dtrunc(rowrange, size =rowr, mu = rowmu, spec = "nbinom", a = Xlowerbound - 1, b = Xupperbound)
colprob<-dtrunc(colrange,size = colr, mu = colmu, spec = "nbinom", a = Ylowerbound - 1, b = Yupperbound)
# updating users about what is happening
print("Truncated negative binomial distributions between were calculated for both the X and Y variable.")
# reweighting tables
#names
names(rowprob)<-Xlowerbound:Xupperbound
names(colprob)<-Ylowerbound:Yupperbound
rowprob<-reweight.univariatetable(observed.table = X, estimated.table = rowprob)
colprob<-reweight.univariatetable(observed.table = Y, estimated.table = colprob)
} #end of case where 2 tables
#case where X=table and Y=average
if (length(X)>1 && length(Y)==1) {
#find mu from censored table
roww=cnbinom.pars(X)
rowmu=roww$Average
rowr = roww$Dispersion
#average represent mu for Y
colmu=Y
#Negative binomial
rowprob<-dtrunc(rowrange, size =rowr, mu = rowmu, spec = "nbinom", a = Xlowerbound - 1, b = Xupperbound)
#Poisson
colprob<-dtrunc(colrange,lambda =colmu, spec = "pois", a = Ylowerbound - 1, b = Yupperbound)
# updating users about what is happening
print("A negative binomial distribution was calculated for the X variable and a Poisson distribution was calculated for the Y variable (i.e. Var(Y) = E(Y) has to be assumed when only an average is provided).")
# reweighting tables
#names
names(rowprob)<-Xlowerbound:Xupperbound
names(colprob)<-Ylowerbound:Yupperbound
rowprob<-reweight.univariatetable(observed.table = X, estimated.table = rowprob)
} #end of case where X=table and Y=average
#case where X=average and Y=table
if (length(X)==1 && length(Y)>1) {
#averages represent mu for X
rowmu=X
coll=cnbinom.pars(Y)
colmu=coll$Average
colr = coll$Dispersion
#Poisson
rowprob<-dtrunc(rowrange,lambda =rowmu, spec = "pois", a = Xlowerbound - 1, b = Xupperbound)
#Negative binomial
colprob<-dtrunc(colrange,size = colr, mu = colmu, spec = "nbinom", a = Ylowerbound - 1, b = Yupperbound)
# updating users about what is happening
print("A truncated Poisson distribution was calculated for the X variable (i.e. Var(X) = E(X) has to be assumed when only an average is provided) and a truncated negative binomial distribution was calculated for Y.")
# reweighting tables
#names
names(rowprob)<-Xlowerbound:Xupperbound
names(colprob)<-Ylowerbound:Yupperbound
colprob<-reweight.univariatetable(observed.table = Y, estimated.table = colprob)
} #end of case where X=average and Y=table
# case where censored contingency table = Y
if (is.null(X) && length(Y)>2){
x<-row.marginal(Y)
roww=cnbinom.pars(x)
rowmu=roww$Average
rowr = roww$Dispersion
y<-column.marginal(Y)
coll=cnbinom.pars(y)
colmu=coll$Average
colr = coll$Dispersion
# error if table is weird
sumx<-sum(x$`Marginal Frequencies`)
sumy<-sum(y$`Marginal Frequencies`)
# removing marginals to get to inside of table
# br = bottom row, tr = top row, rc = right column, and lc = left column
brgone<-Y[-nrow(Y),]
# for removing the top row we have to also accounte for if user read in csv with header=TRUE or header=FALSE
trgone=unname(brgone)
rcgone<-trgone[, -ncol(trgone)]
lcgone<-rcgone[,-1]
Inside<-matrix(as.matrix(lcgone), dim(data.frame(lcgone))*dim(data.frame(lcgone))[2], 1)
#r emoving commas from inside of table
inside<-str_replace_all(Inside,",","")
suminside<-sum(as.numeric(inside))
if(isFALSE(all.equal(sumx,sumy)) ) stop ('Margins are not equal.')
if(isFALSE(all.equal(sumx,suminside))) stop ('Row margins are not equal to inside of table.')
if(isFALSE(all.equal(sumy,suminside))) stop ('Column margins are not equal to inside of table.')
#Negative binomial
rowprob<-dtrunc(rowrange, size =rowr, mu = rowmu, spec = "nbinom", a = Xlowerbound - 1, b = Xupperbound)
colprob<-dtrunc(colrange, size = colr, mu = colmu, spec = "nbinom", a = Ylowerbound - 1, b = Yupperbound)
# updating users about what is happening
print("Truncated negative binomial distributions were calculated for both the row X and column Y.")
}
# case where censored contingency table = X
if (is.null(Y) && length(X)>2){
x<-row.marginal(X)
roww=cnbinom.pars(x)
rowmu=roww$Average
rowr = roww$Dispersion
y<-column.marginal(X)
coll=cnbinom.pars(y)
colmu=coll$Average
colr = coll$Dispersion
# error if table is weird
sumx<-sum(x$`Marginal Frequencies`)
sumy<-sum(y$`Marginal Frequencies`)
# removing marginals to get to inside of table
# br = bottom row, tr = top row, rc = right column, and lc = left column
brgone<-X[-nrow(X),]
# for removing the top row we have to also accounte for if user read in csv with header=TRUE or header=FALSE
trgone=unname(brgone)
rcgone<-trgone[, -ncol(trgone)]
lcgone<-rcgone[,-1]
Inside<-matrix(as.matrix(lcgone), dim(data.frame(lcgone))*dim(data.frame(lcgone))[2], 1)
#r emoving commas from inside of table
inside<-str_replace_all(Inside,",","")
suminside<-sum(as.numeric(inside))
if(isFALSE(all.equal(sumx,sumy)) ) stop ('Margins are not equal.')
if(isFALSE(all.equal(sumx,suminside))) stop ('Row margins are not equal to inside of table.')
if(isFALSE(all.equal(sumy,suminside))) stop ('Column margins are not equal to inside of table.')
#Negative binomial (different ways to calculate it.. )
rowprob<-dtrunc(rowrange, mu = rowmu, size =rowr, spec = "nbinom", a = Xlowerbound - 1, b = Xupperbound)
colprob<-dtrunc(colrange, mu = colmu, size = colr, spec = "nbinom", a = Ylowerbound - 1, b = Yupperbound)
# updating users about what is happening
print("Truncated negative binomial distributions were calculated for both row X and column Y.")
}
# error if averages/bounds are incorrect
if (rowmu <= Xlowerbound) stop ('Check X (row) inputs. X average <= Xlowerbound.')
if (colmu <= Ylowerbound) stop ('Check Y (column) inputs. Y average <= Ylowerbound.')
# error if rowmu/colmu > bounds
if (rowmu >= Xupperbound) stop ('Check X (row) inputs. X average >= Xupperbound.')
if (colmu >= Yupperbound) stop ('Check Y (column) inputs. Y average >= Yupperbound.')
# both marginals == 1
rowc_prob<-rowprob
colc_prob<-colprob
# print(sum(rowc_prob))
# print(sum(colc_prob))
# generating an intial a table to be updated.. different cases ...
# decoupled case no seed provided
if (!is.null(X) && !is.null(Y) && is.null(seed.matrix)) {
seedfinal <- array(1,dim=c(length(rowc_prob),c(length(colc_prob))))
# make probabilities
seedfinal <- seedfinal/sum(seedfinal)
print("The default seed matrix implies independence between variables 1/(length(Xlowerbound:Xupperbound)*length(Ylowerbound:Yupperbound)) (i.e. independence between variables has to be assumed when there is no external information about the joint distribution)")}
# decoupled case with seed provided
if (!is.null(X) && !is.null(Y) && !is.null(seed.matrix)) {seedfinal=seed.matrix}
# case where censored contingency table = X and no seed is provided
if (is.null(Y) && length(X)>2 && is.null(seed.matrix)){
seedfinal<- seedmatrix(X, Xlowerbound, Xupperbound, Ylowerbound, Yupperbound)$Probabilities
print ("The default seed matrix is the output from seedmatrix()$Probabilities: see seedmatrix() function for more information")}
# case where censored contingency table = Y and no seed is provided
if (is.null(X) && length(Y)>2 && is.null(seed.matrix)){
seedfinal<- seedmatrix(Y, Xlowerbound, Xupperbound, Ylowerbound, Yupperbound)$Probabilities
print ("The default seed matrix is the output from seedmatrix()$Probabilities: see seedmatrix() function for more information")}
# case where censored contingency table = X and seed is provided
if (is.null(Y) && length(X)>2 && !is.null(seed.matrix)){seedfinal=seed.matrix}
# case where censored contingency table = Y and seed is provided
if (is.null(X) && length(Y)>2 && !is.null(seed.matrix)){seedfinal=seed.matrix}
if( isFALSE(all.equal(sum(seedfinal), 1))) {
seedfinal<-seed.matrix/sum(seed.matrix)
print("seed.matrix was turned to probabilites: seed.matrix/sum(seed.matrix)")
}
# store the margins in a list
tgt.data <- list(rowc_prob, colc_prob)
# list of dimensions of each marginal constrain
tgt.list <- list(1,2)
# calling the estimate function (seed.estimation.method = ipfp, ml, chi2, lsq)
if (is.null(seed.estimation.method)) {seedestimationmethodfinal <- c("ipfp")} else{
seedestimationmethodfinal=c(seed.estimation.method)}
# print seed and method using
print(paste("You are using the", seedestimationmethodfinal, "method to updated the seed."))
#mipfp R package
final<- Estimate(seedfinal, tgt.list, tgt.data, method = seedestimationmethodfinal)
# check to see if seed method could converge and give error if not
if (exists("final")==FALSE) {stop ('Estimate() function in mipfp R package cannot complete convergence.')}
#cell estimated values
finalx<-data.frame(final$x.hat)
row.names(finalx)<-Xlowerbound:Xupperbound
names(finalx)<-Ylowerbound:Yupperbound
#cell probabilities values
# finalp<-data.frame(final$x.hat)
# start reweighting contingency ###########################
# Y is contingency table
if (is.null(X)){
finalouput<-reweight.contingencytable(observed.table = Y, estimated.table = finalx)
}
# X is contingency table
if (is.null(Y)){
finalouput<-reweight.contingencytable(observed.table = X, estimated.table = finalx)
}
# decouled cases
if (!is.null(Y) && !is.null(X)) {
finalouput<-finalx
}
# end reweighting ###########################
# negative binomal dispersion parameter is not always used.
if (exists("rowr")==FALSE) {rowr=NA}
if (exists("colr")==FALSE) {colr=NA}
return(list("Probability.Estimates"=finalouput, "RowX.Average" = rowmu, "ColumnY.Average" = colmu,
"RowX.Dispersion" = rowr, "ColumnY.Dispersion" = colr))
} #end of rec
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.