Nothing
#' This data contains critical values for testing at the significance level 5%
#' Data published in the original article.
#' Crit was taken from the original study (see Table2).
#' Values in Crit were recalculated by Josef Dolejs
#' for more possible steps of freedom
#' @name crit
#' @docType data
#' @author Steven G. Gilmour and recalculated by Josef Dolejs for more possible steps of freedom
#' @references \url{https://rss.onlinelibrary.wiley.com/doi/10.2307/2348411}
#' @keywords data
"crit"
#' Ilustrative data published in the original article (House prices)
#' Data with the practical meaning
#' Gilmour9p was taken from the original study (see Table1).
#' @name Gilmour9p
#' @docType data
#' @author Steven G. Gilmour
#' @references \url{https://rss.onlinelibrary.wiley.com/doi/10.2307/2348411}
#' @keywords data
"Gilmour9p"
#' T1 contains simulated data without real meaning
#' the null hypothesis is not rejected in the first test
#' it illustrates the situation if full model is final model
#' data has no real meaning
#' @name T1
#' @docType data
#' @author Josef Dolejs
#' @references Josef Dolejs
#' @keywords data
"T1"
#' T2 illustrates the situation if
#' the loop of tests is finished by the Trivial model
#' data has no practical meaning
#' @name T2
#' @docType data
#' @author Josef Dolejs
#' @references Josef Dolejs
#' @keywords data
"T2"
#' Trivial illustrates the situation if
#' the Trivial model is model_min without testing process
#' data has no real meaning
#' @name Trivial
#' @docType data
#' @author Josef Dolejs
#' @references Josef Dolejs
#' @keywords data
"Trivial"
#' special illustrative data if more than two tests
#' are done in the loop in the function final_model()
#' original Gilmour table was modified
#' data has no real meaning
#' @name Modified_Gilmour9p
#' @docType data
#' @author Josef Dolejs
#' @references Josef Dolejs
#' @keywords data
"Modified_Gilmour9p"
#' number of visitors in parks, citation:
#' Stemberk Josef, Josef Dolejs, Petra Maresova, Kamil Kuca.
#' Factors affecting the number of Visitors in National Parks
#' in the Czech Republic, Germany and Austria.
#' International Journal of Geo-Information.
#' ISPRS Int. J. Geo-Inf. 2018, 7(3), 124; doi:10.3390/ijgi7030124
#' data has real meaning
#' @name Parks5p
#' @docType data
#' @author Stemberk Josef, Josef Dolejs, Petra Maresova, Kamil Kuca
#' @references \url{https://www.mdpi.com/2220-9964/7/3/124}
#' @keywords data
"Parks5p"
#' number of patents in universities, citation:
#' Maresova Petra, Soukal Ivan, Stemberkova Ruzena, Selamat Ali
#' Perspective and Suitable Research Area in
#' Public Research-Patent Analysis of the Czech Public Universities
#' Education and Urban Society, 54(7),
#' Maresova Petra, Soukal Ivan, Stemberkova Ruzena, Selamat Ali
#' data has real meaning
#' @name Patents5p
#' @docType data
#' @author Maresova Petra, Soukal Ivan, Stemberkova Ruzena, Selamat Ali
#' @references \doi{10.1177/00131245211027362}
#' @keywords data
"Patents5p"
#' illustrative econometric data from Eurostat
#' for 5 variables in 17 countries in 2019
#' columns: Country, LifExp , HDP, Unempl, Obesity, APassangers
#' remove the first column: rownames(EU2019)= EU2019[,1]; EU2019=EU2019[,-1]
#' data has real meaning
#' @name EU2019
#' @docType data
#' @author Josef Dolejs
#' @references Josef Dolejs
#' @keywords data
"EU2019"
submodels<-function(d){
# two basic objects and the first level of degrees of freedom in testing is t = n-k-1
y=d[,1]; X=d[,2:ncol(d)]; k=ncol(d)-1; n=nrow(d); t=n-k-1 # k is number of regressors
# number of observations should be > number of regressors+3
if( n<=(k+3) ) {message("the number of observations is not higher than k+3")
message( paste("reduction of regressors should be done, n =",n,"and k+3 =",k+3))}
# the second level of degrees of freedom is r=k-q+1
# full model is labeled as "Model", set of regressors is "x1-xk" for k
regressors = paste0("x",1:k)
Model=paste0("y~")
for (i in 1:(k-1)){ Model=paste0(Model, regressors[i],"+") } # only k-1 regressors
Model=paste0(Model, regressors[k])
# the sign plus is not used after the last regressor
message( paste("Full Model: ",Model) )
# reading of data to regressors from table d
for (i in 1:k){ assign(regressors[i],d[,i+1]) }
# Results and MSE in the full model
Results = lm( as.formula(Model) )
MSE = sigma(Results)^2
# variables n, k, y, x1,x2...xk, MSE are used
# matrix Smodels contains all possible submodels
s=0 # number of all possible submodel is calculated
# firstly, number of all possible combinations is combn(seq(from=1,to=k,by=1),i)
# i is "level" and number of regressors, number of submodels in specific level "i"
# is calculated and the resulting value s is the sum over all levels "i"
for (i in 1:k){
s = s + ncol(combn(seq(from=1,to=k,by=1),i))
}
s # e.g. s= 511 in "Gilmour9p.txt" for k=9
# matrix of submodels and string expressions of submodels are created
m=0 # m is row in matrix of submodels, it is in the first column
Smodels = matrix(,ncol=4,nrow=s) # implementation of matrix of submodels (now is empty)
colnames(Smodels)=c("Submodel number","Number of regressors+1","Cp","Submodel")
# the trivial model (constant C1, p=1) is calculated
# function var() calculates sample variation ((n-1) is used)
m=m+1; p=1 # here row m=1 (the first row contains the trivial model "C")
C1=var(y)*(n-1)/MSE-(n-2*p)-2*(k-p+1)/(n-k-3) # formula the original article
# string expression of submodel is in the 4th column and variable "TT" is used
Smodels[m,1]=toString(m)
Smodels[m,2]=toString(p)
Smodels[m,3]=toString(C1)
Smodels[m,4]=toString("C") # the end of the trivial model
# loop through all levels of submodels: p = 1 : k
# in the full model the level is p = k+1, therefore p in the last submodel is k here
for (p in 2:(k+1-1)) { # p is level (size) of submodel (number of predictros + 1)
# level p is fixed and all combinations at the level are calculated
# all levels without the trivial model, number of combinations is p-1
Comb_p = combn(seq(from=1,to=k,by=1),p-1) # e.g. for p=2 only one regressor
# immersed loop for fixed p
# the main part creating a textual representation of the submodel
# and its calculation using the lm() function
for (i in 1:ncol(Comb_p)){ # i is number of submodels in subset for level p
subM=matrix(, ncol=p-1,nrow=ncol(Comb_p)) # matrix of regressors, not submodels
for(j in 1:p-1) {subM[i,j]= sprintf("x%s", Comb_p[j,i])} # j is for regressors
# string expressions of submodels TT is in two alternatives: p>2 or p==2
TT="y~"
if(p>2){for (j in 1:(p-2)){TT=paste0(TT, subM[i,j],"+")}
TT=paste0(TT,subM[i,p-1])}
if(p==2) {TT=paste0(TT,subM[i,p-1])}
sublm = lm(as.formula(TT)) # linear regression calculated in submodel using lm()
Cp=(sum(resid(sublm)^2)/MSE-n+2*p)-(2*(k-p+1)/(n-k-3)) # Cp
# 4 values in 4 columns in matrix Smodels are written to m row
# the four values are: number of submodel m, level p, Cp, textual representation of the submodel
m=m+1
Smodels[m,1]=toString(m); Smodels[m,2]=toString(p)
Smodels[m,3]=toString(Cp); Smodels[m,4]=toString(TT)
} # i is number of submodels in subset for level p
} # p is level (size) of submodel (number of predictros + 1)
message(dim(Smodels))
message(Smodels) # the output of Smodels
# selection of candidate submodel with the minimal value of Cp
min=which.min(as.numeric(Smodels[,3]))
# minimal Cp = Cq+1 (labeling according to the original article)
Cq_plus_1 = min(as.numeric(Smodels[,3])) # e.g. p=3 in "y~x1+x2" and q=2 in the original example
q=as.numeric(Smodels[min,2])-1 # number of regressors in model_min
message(paste("Number of regressors in model_min ",q))
model_min=Smodels[min,4]
if(q>0){ model_min_results= lm(as.formula(model_min)); model_minA=paste( model_min, " + constant") }
if(q==0){ model_minA ="Trivial model" ; model_min_results= NA ; Cq_plus_1=C1 }
message(model_minA)
message(paste("model_min: ", model_minA))
message(paste("Cq+1: ", Cq_plus_1))
return( list(y=y,X=X,n=n,k=k,t=t,regressors=regressors, full_model= paste(Model," + constant"),
full_model_results= Results, MSE = MSE, submodels_number=s, submodels=Smodels,
Cq_plus_1A= Cq_plus_1, q=q, model_min=model_minA, model_min_results= model_min_results) )
}
final_model<-function(d){
# two basic objects and the first level of degrees of freedom in testing is t = n-k-1
y=d[,1]; X=d[,2:ncol(d)]; k=ncol(d)-1; n=nrow(d); t=n-k-1 # k is number of regressors
# (n-k-3)> 0 => number of observations should be > number of regressors+3
if( n<=(k+3) ) {message("the number of observations is not higher than k+3")
message( paste("reduction of regressors should be done, n =",n,"and k+3 =",k+3))}
# the second level of degrees of freedom is r=k-q+1
# full model is labeled as "Model", set of regressors is "x1-xk" for k
regressors = paste0("x",1:k)
Model=paste0("y~")
for (i in 1:(k-1)){ Model=paste0(Model, regressors[i],"+") } # only k-1 regressors
Model=paste0(Model, regressors[k]) # the sign plus is not used after the last regressor
# reading of data to regressors from table d
for (i in 1:k){ assign(regressors[i],d[,i+1]) }
# the full model Results, MSE
full_model_results = lm( as.formula(Model) ); MSE = sigma(full_model_results)^2
# variables n, k, y, x1,x2...xk, MSE are used
# matrix Smodels contains all possible submodels
# sufficient number of observations n should be available (n>k+3)
s=0 # number of all possible submodel
# firstly, number of all possible combinations is combn(seq(from=1,to=k,by=1),i)
# i is "level" and number of regressors, number of submodels in specific level "i"
# is calculated and the resulting value s is the sum over all levels "i"
for (i in 1:k){
s = s + ncol(combn(seq(from=1,to=k,by=1),i))
}
s # e.g. it is 511 in "Gilmour9p" for k=9
# matrix of submodels and string expressions of submodels are created
m=0 # m is row in the matrix submodels and is in the first column
Smodels = matrix(,ncol=4,nrow=s) # implementation of matrix of submodels
colnames(Smodels)=c("Submodel number","Number of regressors+1","Cp","Submodel")
# firstly, the trivial model (constant C1, p=1) is calculated
# function var() calculates sample variation and (n-1) is used
m=m+1; p=1 # row m=1 contains the trivial model "C"
C1=var(y)*(n-1)/MSE-(n-2*p)-2*(k-p+1)/(n-k-3) # formula from the original article
# string expression of submodel is in the 4th column (variable "TT")
Smodels[m,1]=toString(m); Smodels[m,2]=toString(p)
Smodels[m,3]=toString(C1); Smodels[m,4]=toString("C") # the end of the trivial model
# loop through all levels of submodels: p = 1 : k
# in the full model p = k+1, therefore p in the last submodel is k
for (p in 2:(k+1-1)) { # p is size of submodel (number of regressors + 1)
# level p is fixed and all combinations at the level are calculated
# all levels without the trivial model, number of all combinations is p-1
Comb_p = combn(seq(from=1,to=k,by=1),p-1) # e.g. for p=2 is one regressor
# immersed loop for fixed p
# creating a textual representation of submodel
# and calculation of submodels results using the lm()
for (i in 1:ncol(Comb_p)){ # i is number of submodels in subset for level p
subM=matrix(, ncol=p-1,nrow=ncol(Comb_p)) # matrix of regressors, not submodels
for(j in 1:p-1) {subM[i,j]= sprintf("x%s", Comb_p[j,i])} # j is for regressor
# string expressions of submodels TT is in two alternatives: p>2 or p==2
TT="y~"
if(p>2){for (j in 1:(p-2)){TT=paste0(TT, subM[i,j],"+")}
TT=paste0(TT,subM[i,p-1])}
if(p==2) {TT=paste0(TT,subM[i,p-1])}
sublm = lm(as.formula(TT)) # results calculated in submodel
Cp=(sum(resid(sublm)^2)/MSE-n+2*p)-(2*(k-p+1)/(n-k-3)) # Cp
# 4 values in 4 columns in matrix Smodels are written to m row
# the four values are:
# m, level p, Cp, textual representation of the submodel
m=m+1
Smodels[m,1]=toString(m); Smodels[m,2]=toString(p)
Smodels[m,3]=toString(Cp); Smodels[m,4]=toString(TT)
} # i is number of submodels in subset for level p
} # p is level (size) of submodel (number of predictros + 1)
# selection of candidate submodel with the minimal value of Cp
min=which.min(as.numeric(Smodels[,3]))
# minimal Cp = Cq+1 (labeling according to the original article)
Cq_plus_1 = min(as.numeric(Smodels[,3])) # p=3 in "y~x1+x2" and q=2 in the original example
q=as.numeric(Smodels[min,2])-1 # number of regressors in model_min
message(paste("Number of regressors in model_min ",q))
model_minA=Smodels[min,4] ; Cq_plus_1A= Cq_plus_1 ; qA=q
if(qA>0){ model_min_results= lm(as.formula(model_minA)) }
if(qA==0){ model_minA ="Trivial model" ; model_min_results= NA ; FinalCp= Cq_plus_1A }
message(paste("model_min: ", model_minA))
message(paste("Cq+1:",Cq_plus_1A)) #
# table "crit" contains critical values at the significance level 5%
colnames(crit)=c("t=n-k-1 and r = k - q +1",1:30)
# labeling "q+1=p" and "q" is used as in the original article
# the number of regressors in specific submodel with the minimal value of Cp=Cq+1 is q and p=q+1
# e.g. in the example in the original article model_min: "y~x1+x2" q=2
# generally, the process may be finished at the level p=1
# and the trivial model may be resulting model
# model_minA is tested against the best immersed submodel
# one regressor is removed and the best immersed submodel is used in the first cycle
# if the null hypothesis is rejected then the best immersed submodel is determined as "model_min"
# then the loop continue to the next step
# if the null hypothesis is not rejected then final submodel is found...
message(paste("The starting value of q for the test of submodel is: ", qA))
model_min=model_minA; NullHypothesis=TRUE
if(qA>0){ for (level in 1:qA) { if(NullHypothesis) {
# q is number of regressors in submodel, number of possible reduced cadidates is also q
# t = n-k-1 a r = k q+1 are steps of freedom in the test (for critical values)
if(q == 1){ # if q=1 then the test of submodel with one regressor against the trivial model is calculated
r=k-qA+1
F=C1-Cq_plus_1+2*(n-k-2)/(n-k-3); Critical = crit[t-2,r+1]
if(F<Critical) { message(paste0("For q = ", q,": Ho is rejected and the submodel with one regressor is rejected. Only trivial model (constant) is assumed. C1 = ", round(C1,5)))
FinalCp=C1 ; q=0; final_model_results=NA ; NullHypothesis=FALSE }
if(F>Critical) {
message(paste0("For q = ", q ," : the end, Ho is not rejected and the resulting model is: ", model_min,
"+constant, F = ", round(F,5), " Cq+1 = ", round(Cq_plus_1,5) ))
FinalCp= Cq_plus_1 ;final_model= paste(model_min," + constant") ; final_model_results=lm(as.formula(model_min))
NullHypothesis=FALSE
}
} # if(q == 1)
if(q > 1){ # if q > 1 then matrix Qmodels of all reduced submodels is used
r=k-q+1
Xpositions=unlist(gregexpr('x', model_min))
# if Ho is rejected then new better submodel model_min is known, submodels with q-1 regressors
Qmodels=matrix(,ncol=4,nrow=q)# new matrix of Qsubmodels is used with q rows
mm=1 # the first reduced model from model_min
# the first regressor is excluded
Qmodels[mm,1]=mm; Qmodels[mm,2]=q; # writing mm row to matrix
Qmodels[mm,4]= paste0(substr(model_min,1,2),substr(model_min,Xpositions[mm+1],nchar(model_min)))
qlm = lm(as.formula(Qmodels[mm,4]))
Qmodels[mm,3]=(sum(resid(qlm)^2)/MSE-n+2*q)-(2*(k-q+1)/(n-k-3))
if (q>2){
for (i in 2:(q-1)){ # the following reduced models from model_min
mm=mm+1
Qmodels[mm,1]=mm; Qmodels[mm,2]=q;
Qmodels[mm,4]= paste0(substr(model_min,1,Xpositions[mm]-1),
substr(model_min,Xpositions[mm+1],nchar(model_min)))
qlm = lm(as.formula(Qmodels[mm,4]))
Qmodels[mm,3]=(sum(resid(qlm)^2)/MSE-n+2*q)-(2*(k-q+1)/(n-k-3))
}
}
mm=mm+1 # the last reduced model from model_min
Qmodels[mm,1]=mm; Qmodels[mm,2]=q
Qmodels[mm,4]= substr(model_min,1,Xpositions[mm]-2)
qlm = lm(as.formula(Qmodels[mm,4]))
Qmodels[mm,3]=(sum(resid(qlm)^2)/MSE-n+2*q)-(2*(k-q+1)/(n-k-3))
min=which.min(as.numeric(Qmodels[,3])) # position of the minimal Cq in reduced submodel
CCq=as.numeric(Qmodels[min,3]) # CCq is the minimal Cq in reduced models, it is tested
ModelTest= Qmodels[min,4] # the best model with the lowest Cq
F=CCq-Cq_plus_1+2*(n-k-2)/(n-k-3)
# it is now model_min, q=q-1 and the process is repeated
# until q >2 the loop for (level in 1:qA){}
r=k-q+1; Critical = crit[t-2,r+1]
if(F<Critical) {message(paste0("For q = ", q,": Ho is rejected, new submodel goes to the next test."))
model_min= ModelTest;q=q-1 ; Cq_plus_1= CCq
final_model= paste(model_min," + constant"); FinalCp= Cq_plus_1
}
if(F>Critical) { message(paste0("For q = ", q ,": the End, Ho is not rejected, the resulting model is: ",
model_min,"+const. F= ", round(F,5), " Cq+1 = ", round(Cq_plus_1,5) ))
final_model= paste(model_min," + constant"); FinalCp= Cq_plus_1 ; NullHypothesis=FALSE
}
} # the end of if(q>1) # if q > 1 then matrix Qmodels of all reduced submodels is used
} } } # the end of if(qA>0), the end of the loop for (level in qA:1) and if(NullHypothesis)
# all tests were done
if(q == 0){r=k-q+1; message(paste0("For q = ",
q,": the end, only constant is assumed with no regressors. C1 = ", round(C1,5) ))
final_model="Trivial model"
message("Trivial model is not tested.")
# The final values of testing process
return( list(y=y,X=X,n=n, k=k, t=t, full_model=Model, full_model_results = full_model_results, MSE = MSE,
submodels_number=s, submodels=Smodels, model_min= model_minA, model_min_results = model_min_results ,
p=1, Cq_plus_1A= Cq_plus_1A, FinalCp= FinalCp,
final_model= "Trivial model", final_model_results= NA) )
} # trivial model is not tested (no submodel exists)
message(paste("Final value of q is : ",q))
if(q > 0){
final_model= paste(model_min," + constant")
final_model_results=lm(as.formula(model_min))
return( list(y=y,X=X,n=n, k=k, t=t, full_model=Model, full_model_results = full_model_results, MSE = MSE,
submodels_number=s, submodels=Smodels, model_min= model_minA, model_min_results = model_min_results ,
p=qA, Cq_plus_1A= Cq_plus_1A, FinalCp= FinalCp,
final_model= final_model, final_model_results= final_model_results) )
}
} # the end of final_model=function(d)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.