fitFSD: Fitting firm size-age distributions

fit.FSDR Documentation

Fitting firm size-age distributions

Description

To fit firm size and/or age distributions based on binned data.

Usage

fit.FSD(x,breaks,dist)

Arguments

x

'x' can be a vector or a matrix, or a 'histogram'.

breaks

a matrix with two columns if 'x' is a matrix. Otherwise, it is a vector. Can be missing if 'x' is a vector and 'x' will be grouped uisng the default parameters with hist.

dist

distribution type for 'x' and/or 'y'. Options include Weibull, gpd – generalized Pareto distribution, pd or Pareto, EWD– exponentiated Weibull distribtion.

Value

x.fit, y.fit

Fitted marginal distribution for row- and column-data when 'x' is a matrix.

Psi

Plackett estimate of the Psi. ONLY for 'fit.FSD2'

If each of x.fit and y.fit, the following values are available:

xhist

histogram.

dist

distribution type. Options include Weibull, gpd – generalized Pareto distribution, pd or Pareto, EWD– exponentiated Weibull distribtion.

size

Total number of observations (sample size)

pars

Estimates of parameters.

y, y2, x

the pdf (y) and cdf (y2) values evaluated on a grid x

Examples

x <- rweibull(1000,2,1)
(out <- fit.FSD(x))

data(FSD)
b <- c(0,1:5,6,11,16,21,26,38,Inf);
x <- as.numeric(FirmAge[38,]);
## not run
##(out <- fit.FSD(x))#treated as raw data
xh <- binning(counts=x, breaks=b)
(out <- fit.FSD(xh))
#(out <- fit.FSD(xh,dist="ewd"))
(out <- fit.FSD(xh,dist="pd"))
(out <- fit.FSD(xh,dist="gpd"))


x <- as.numeric(FirmSize[nrow(FirmSize),])
brks.size <- c(0,4.5,9.5,19.5,49.5,99.5,249.5,499.5,
               999.5,2499.5,4999.5, 9999.5,Inf)
xh <- binning(counts=x,breaks=brks.size)
Fn <- cumsum(x)/sum(x);Fn
k <- length(Fn)
i <- c((k-5):(k-1))
out1 <- fit.GLD(xh, qtl=brks.size[i+1],
               qtl.levels=Fn[i],lbound=0)

i <- c(2,3,4,10,11)
out2 <- fit.GLD(xh, qtl=brks.size[i+1],
               qtl.levels=Fn[i],lbound=0)

i <- c(1,2,8,9,10)
out3 <- fit.GLD(xh, qtl=brks.size[i+1],
               qtl.levels=Fn[i],lbound=0)

plot(xh,xlim=c(0,120))
lines(out1, col=2)
lines(out2, col=3)
lines(out3, col=4)

ZipfPlot(xh,plot=TRUE)
lines(log(1-out1$y2)~log(out1$x), col=2)
lines(log(1-out2$y2)~log(out2$x), col=3)
lines(log(1-out3$y2)~log(out3$x), col=4)

## sample codes for the figures and tables in the PLoS ONE manuscript

## Table 1 *****************************************************
#xtable(Firm2)

## Figure 1 ****************************************************

#rm(list=ls())
#require(bda)
#data(FSD)

##postscript(file='fig1.eps',paper='letter')
#par(mfrow=c(2,2))

#tmp <- ImportFSD(FirmAge, year=2014,type="age");
#xh <- tmp$age
#plot(xh, xlab="X", main="(a) Histogram of 2014 firm age data")
#fit0 <- fit.FSD(xh, dist='exp')
#lines(fit0$x.fit$ly~fit0$x.fit$lx, lty=2)

#ZipfPlot(fit0, lty=2,
#         xlab="log(b)",ylab="log(r)",
#         main="(b) Zipf plot of firm age (2014)")

#tmp <- ImportFSD(FirmAge, year=1988,type="age");
#xh2 <- tmp$age
#fit1 <- fit.FSD(xh2, dist='exp')
#ZipfPlot(fit1, lty=2,
#         xlab="log(b)",ylab="log(r)",
#         main="(c) Zipf plot of firm age (1988)")

#ZipfPlot(fit0, lty=2, type='l',
#         xlab="log(b)",ylab="log(r)",
#         main="(d) Zipf plots of firm age (1979-2014)")
#for(year in 1979:2014){
#    tmp <- ImportFSD(FirmAge, year=year,type="age")
#    tmp2 <- ZipfPlot(tmp$age, plot=FALSE)
#    lines(tmp2)
#}

#dev.off()

## Figure 2 ****************************************************

#rm(list=ls())
#require(bda)
#data(FSD)

#postscript(file='fig2.eps',paper='letter')
#par(mfrow=c(2,2))
## plot (a)
#tmp <- ImportFSD(FirmSize, year=2014,type="size");
#yh <- tmp$size
#plot(yh, xlab="Y", main="(a) Histogram of firm size (2014)")

## plot (b)
#lbrks <- log(yh$breaks); lbrks[1] <- log(1)
#cnts <- yh$freq
#xhist2 <- binning(counts=cnts,breaks=lbrks)
#plot(xhist2,xlab="log(Y)",
#     main="(b) Histogram of firm size (2014, log-scale)")

## plot (c)
#ZipfPlot(yh, plot=TRUE,
#         main="(c) Zipf plot firm size (2014)",
#         xlab="log(r)",ylab="log(b)")

## plot (d)

#ZipfPlot(yh, plot=TRUE,type='l',
#         main="(d) Zipf plots of firm size (1977-2014)",
#         xlab="log(r)",ylab="log(b)")

#res <- NULL
#for(year in 1977:2014){
#    tmp <- ImportFSD(FirmSize,year=year,type="size");
#    yh0 <- tmp$size
#    zipf1 <- ZipfPlot(yh0,plot.new=FALSE)
#    lines(zipf1)
#    res <- c(res, zipf1$slope)
#}

#dev.off()

#mean(res); sd(res)
#quantile(res, prob=c(0.025,0.097))


## Figure 3a & 3b ****************************************************
#rm(list=ls())
#require(bda)
#data(FSD)

#postscript(file='fig3a.eps',paper='letter')

tmp <- ImportFSD(FirmAge, year=2014,type="age");
xh <- tmp$age
(fit1 <- fit.FSD(xh, dist='exp'));
(fit2 <- fit.FSD(xh, dist='weibull'));
#(fit3 <- fit.FSD(xh, dist='ewd')); #slow
#(fit0 <- fit.FSD(xh, dist='gpd')); # test, not used
(fit4 <- fit.FSD(xh, dist='gld'));

#plot(xh, xlab="X", main="(a) Fitted Distribtions")
#lines(fit1, lty=1, col=1,lwd=3)
#lines(fit2, lty=2, col=1,lwd=3)
#lines(fit3, lty=3, col=1,lwd=3)
#lines(fit4, lty=4, col=1,lwd=3)
#lines(fit0, lty=4, col=2,lwd=3)

legend("topright",cex=2,
       legend=c("EXP","Weibull","EWD","GLD"),
       lty=c(1:4),lwd=rep(3,4),col=rep(1,4))
dev.off()

## Table 3
#r2 <- c(fit1$x.fit$Dn.Zipf, fit2$x.fit$Dn.Zipf,
#        fit3$x.fit$Dn.Zipf, fit4$x.fit$Dn.Zipf)
#dn <- c(fit1$x.fit$Dn, fit2$x.fit$Dn,
#        fit3$x.fit$Dn, fit4$x.fit$Dn)
#aic <- c(fit1$x.fit$AIC, fit2$x.fit$AIC,
#         fit3$x.fit$AIC, fit4$x.fit$AIC)
#bic <- c(fit1$x.fit$BIC, fit2$x.fit$BIC,
#         fit3$x.fit$BIC, fit4$x.fit$BIC)
#aicc <- c(fit1$x.fit$AICc, fit2$x.fit$AICc,
#          fit3$x.fit$AICc, fit4$x.fit$AICc)
#tbl3 <- data.frame(
#    Dn.Zipf = r2, Dn=dn, AIC=aic, BIC=bic,AICc=aicc)
#rownames(tbl3) <- c("EXP","WD","EWD","GLD")
##save(tbl3, file='tbl3.Rdata')
#tbl3
#require(xtable)
#xtable(tbl3,digits=c(0,4,4,0,0,0))


#postscript(file='fig3b.eps',paper='letter')
#par(mfrow=c(1,1))
#ZipfPlot(fit1, plot.new=TRUE,col=1,lwd=3,lty=1,
#         xlab="log(x)",ylab="log(S(x))",
#         main="(b) Zipf Plots of Fitted Distribtions")
#ZipfPlot(fit2, plot.new=FALSE,col=1,lty=2,lwd=3)
#ZipfPlot(fit3, plot.new=FALSE,col=1,lty=3,lwd=3)
#ZipfPlot(fit4, plot.new=FALSE,col=1,lty=4,lwd=3)

#legend("bottomleft",cex=2,
#       legend=c("EXP","Weibull","EWD","GLD"),
#       lty=c(1:4),lwd=rep(3,4),col=rep(1,4))
#dev.off()

## TABLE 4 ############################################
## More Results **************************************

#mysummary <- function(x,dist){
#    .winner <- function(x,DIST=dist){
#        isele <- which(x==min(x))
#        if(length(isele)>1)
#            warning("multiple winners, only the first is used")
#        DIST[isele[1]]
#    }
#    .mysum <- function(x,y) sum(y==x)
#    mu1 <- tapply(x$Dn.Zipf,x$Dist, mean, na.rm=TRUE)
#    mu2 <- tapply(x$Dn,x$Dist, mean, na.rm=TRUE)
#    sd1 <- tapply(x$Dn.Zipf,x$Dist, sd, na.rm=TRUE)
#    sd2 <- tapply(x$Dn,x$Dist, sd, na.rm=TRUE)
#    win1 <- tapply(x$Dn.Zipf,x$Year, .winner,DIST=dist)
#    win2 <- tapply(x$Dn,x$Year, .winner,DIST=dist)
#    win3 <- tapply(x$BIC,x$Year, .winner,DIST=dist)
#    dist0 <- levels(as.factor(x$Dist))
    
#    out1 <- sapply(dist0, .mysum,y=win1)
#    out2 <- sapply(dist0, .mysum,y=win2)
#    out3 <- sapply(dist0, .mysum,y=win3)
#    #sele1 <- match(names(out1), dist0)
#    #sele2 <- match(names(out2), dist0)
#    #sele3 <- match(names(out3), dist0)
    
#    out <- data.frame(Mean.Dn.Zipf=mu1, SD.Dn.Zipf=sd1,
#                      Mean.Dn=mu2, SD.Dn=sd2,
#                      Win.Zipf=out1,
#                      Win.Dn=out2,
#                      Win.BIC=out3)
#    out
#}

#require(bda) #version 14.3.11+
#data(FSD)

#Dn.Zipf <- NULL
#Dn <- NULL
#BIC <- NULL
#Year <- NULL
#DIST <- NULL; dist0 <- c("EXP","WD","EWD","GLD")
#for(year in 1983:2014){
#    tmp <- ImportFSD(FirmAge, year=year,type="age");
#    xh <- tmp$age
#    fit1 <- fit.FSD(xh, dist='exp');fit1
#    DIST <- c(DIST,"EXP")
#    Year <- c(Year, year)
#    Dn.Zipf <- c(Dn.Zipf, fit1$x.fit$Dn.Zipf)
#    Dn <- c(Dn, fit1$x.fit$Dn)
#    BIC <- c(BIC, fit1$x.fit$BIC)
#    fit1 <- fit.FSD(xh, dist='weibull');fit1
#    DIST <- c(DIST,"WD")
#    Year <- c(Year, year)
#    Dn.Zipf <- c(Dn.Zipf, fit1$x.fit$Dn.Zipf)
#    Dn <- c(Dn, fit1$x.fit$Dn)
#    BIC <- c(BIC, fit1$x.fit$BIC)
#    fit1 <- fit.FSD(xh, dist='ewd');fit1
#    DIST <- c(DIST,"EWD")
#    Year <- c(Year, year)
#    Dn.Zipf <- c(Dn.Zipf, fit1$x.fit$Dn.Zipf)
#    Dn <- c(Dn, fit1$x.fit$Dn)
#    BIC <- c(BIC, fit1$x.fit$BIC)
#    fit1 <- fit.FSD(xh, dist='gld');fit1
#    DIST <- c(DIST,"GLD")
#    Year <- c(Year, year)
#    Dn.Zipf <- c(Dn.Zipf, fit1$x.fit$Dn.Zipf)
#    Dn <- c(Dn, fit1$x.fit$Dn)
#    BIC <- c(BIC, fit1$x.fit$BIC)
#}

#RES <- data.frame(Year=Year,Dist=DIST,Dn.Zipf=Dn.Zipf,Dn=Dn,BIC=BIC)
#save(RES, file='tbl4.Rdata')
#load(file='tbl4.Rdata')

#DIST <- c("EXP","WD","EWD","GLD")
#sele <- RES$Year>=1983 & RES$Year<=1987;sum(sele)
#(out <- mysummary(RES[sele,],dist=DIST))
#xtable(out[c(2,4,1,3),],digits=c(0,4,4,4,4,0,0,0))

#sele <- RES$Year>=1988 & RES$Year<=1992;sum(sele)
#(out <- mysummary(RES[sele,],dist=DIST))
#xtable(out[c(2,4,1,3),],digits=c(0,4,4,4,4,0,0,0))

#sele <- RES$Year>=1993 & RES$Year<=1997;sum(sele)
#(out <- mysummary(RES[sele,],dist=DIST))
#xtable(out[c(2,4,1,3),],digits=c(0,4,4,4,4,0,0,0))

#sele <- RES$Year>=1998 & RES$Year<=2002;sum(sele)
#(out <- mysummary(RES[sele,],dist=DIST))
#xtable(out[c(2,4,1,3),],digits=c(0,4,4,4,4,0,0,0))

#sele <- RES$Year>=2003 & RES$Year<=2014;sum(sele)
#(out <- mysummary(RES[sele,],dist=DIST))
#xtable(out[c(2,4,1,3),],digits=c(0,4,4,4,4,0,0,0))

#(out <- mysummary(RES,dist=DIST))
#xtable(out[c(2,4,1,3),],digits=c(0,4,4,4,4,0,0,0))

## Figure 4a & 4b ******************************************
#rm(list=ls())
#require(bda)
#data(FSD)


tmp <- ImportFSD(FirmSize, year=2014,type="size");
yh <- tmp$size

(fit1 <- fit.FSD(yh, dist='lognormal'));
(fit2 <- fit.FSD(yh, dist='pareto'));
(fit3 <- fit.FSD(yh, dist='gpd'));
(fit4 <- fit.FSD(yh, dist='gld'));

#postscript(file='fig4a.eps',paper='letter')
#par(mfrow=c(1,1))
#plot(yh, xlab="Y", main="(a) Fitted Distributions",
#     xlim=c(0,50))
#lines(fit1, lty=1, col=1,lwd=3)
#lines(fit2, lty=2, col=1,lwd=3)
#lines(fit3, lty=3, col=1,lwd=3)
#lines(fit4, lty=4, col=1,lwd=3)


#legend("topright",cex=2,
#       legend=c("LN","PD","GPD","GLD"),
#       lty=c(1:4),lwd=rep(3,4),col=rep(1,4))
#dev.off()

#postscript(file='fig4b.eps',paper='letter')
#par(mfrow=c(1,1))
#ZipfPlot(fit1, plot.new=TRUE,col=1,lwd=3,lty=1,
#         xlab="log(y)",ylab="log(S(y))",
#         main="(b) Zipf Plots of Fitted Distribtions")
#ZipfPlot(fit2, plot.new=FALSE,col=1,lty=2,lwd=3)
#ZipfPlot(fit3, plot.new=FALSE,col=1,lty=3,lwd=3)
#ZipfPlot(fit4, plot.new=FALSE,col=1,lty=4,lwd=3)

#legend("bottomleft",cex=2,
#       legend=c("LN","PD","GPD","GLD"),
#       lty=c(1:4),lwd=rep(3,4),col=rep(1,4))
#dev.off()

## TABLE 5 ############################################
## More information about firm size


#Dn.Zipf <- NULL
#Dn <- NULL
#BIC <- NULL
#Year <- NULL
#DIST <- NULL;

#dist0 <- c("LN","PD","GPD","GLD")
#for(year in 1977:2014){
#    DIST <- c(DIST,dist0)
#    Year <- c(Year, rep(year,length(dist0)))
#    tmp <- ImportFSD(FirmSize, year=year,type="size");
#    xh <- tmp$size
#    fit1 <- fit.FSD(xh, dist='lognormal');
#    Dn.Zipf <- c(Dn.Zipf, fit1$x.fit$Dn.Zipf)
#    Dn <- c(Dn, fit1$x.fit$Dn)
#    BIC <- c(BIC, fit1$x.fit$BIC)
#    fit1 <- fit.FSD(xh, dist='pd');
#    Dn.Zipf <- c(Dn.Zipf, fit1$x.fit$Dn.Zipf)
#    Dn <- c(Dn, fit1$x.fit$Dn)
#    BIC <- c(BIC, fit1$x.fit$BIC)
#    fit1 <- fit.FSD(xh, dist='gpd');
#    Dn.Zipf <- c(Dn.Zipf, fit1$x.fit$Dn.Zipf)
#    Dn <- c(Dn, fit1$x.fit$Dn)
#    BIC <- c(BIC, fit1$x.fit$BIC)
#    fit1 <- fit.FSD(xh, dist='gld');
#    Dn.Zipf <- c(Dn.Zipf, fit1$x.fit$Dn.Zipf)
#    Dn <- c(Dn, fit1$x.fit$Dn)
#    BIC <- c(BIC, fit1$x.fit$BIC)
#}

#RES <- data.frame(Year=Year,Dist=DIST,Dn.Zipf=Dn.Zipf,Dn=Dn,BIC=BIC)
#save(RES, file='tbl5.Rdata')
#load(file='tbl5.Rdata')

#dist0 <- c("LN","PD","GPD","GLD")
#(out <- mysummary(RES,dist=dist0))
#require(xtable)
#xtable(out[c(3,2,1),],digits=c(0,4,4,4,4,0,0,0))


## Figure 5  ****************************************************

#xy <- tmp <- ImportFSD(Firm2);
#out1 <- fit.FSD(xy$xy, breaks=xy$breaks,dist=c("EWD","GPD"));out1
#out2 <- fit.FSD(xy$xy, breaks=xy$breaks,dist=c("EWD","GLD"));out2
#out3 <- fit.FSD(xy$xy, breaks=xy$breaks,dist=c("GLD","GPD"));out3
#out4 <- fit.FSD(xy$xy, breaks=xy$breaks,dist=c("GLD","GLD"));out4

#postscript(file="fig5.eps",paper='letter')

#par(mfrow=c(2,2))
#out <- out1
#res2 <- plot(out,grid.size=40,nlevels=30,
#             ylim=c(0,15),xlim=c(0,30),
#             xlab="Firm Age",ylab="Firm Size",
#             main="(a) Contour Plot -- (EWD+GPD)")

#out <- out2
#res2 <- plot(out,grid.size=40,nlevels=30,
#             ylim=c(0,15),xlim=c(0,30),
#             xlab="Firm Age",ylab="Firm Size",
#             main="(b) Contour Plot -- (EWD+GLD)")
#out <- out3
#res2 <- plot(out,grid.size=40,nlevels=30,
#             ylim=c(0,15),xlim=c(0,30),
#             xlab="Firm Age",ylab="Firm Size",
#             main="(c) Contour Plot -- (GLD+GPD)")
#
#out <- out4
#res2 <- plot(out,grid.size=40,nlevels=30,
#             ylim=c(0,15),xlim=c(0,30),
#             xlab="Firm Age",ylab="Firm Size",
#             main="(d) Contour Plot -- (GLD+GLD)")
##dev.off()


## Figure 6  ****************************************************
#xy2 <- tmp <- ImportFSD(Firm2);

## this is an example showing how to use partial data to fit FSD.
## get marginal frequency distribution for firm age:
#(X <- apply(xy2$xy,1,sum));
#brks.age <- c(0,1,2,3,4,5,6,11,16,21,26,38,Inf)
## get marginal frequency distribution for firm size:
#(Y <- apply(xy2$xy,2,sum));
#(Y <- Y[-1])
#brks.size <- c(5,10,20,50,100,250,500,1000,2500,5000,10000,Inf)
#mxy2 <- xy2$xy[,-1]

#(fitx1 <- fit.FSD(X, breaks=brks.age, dist="ewd"))
#(fitx2 <- fit.FSD(X, breaks=brks.age, dist="gld"))
#(fity1 <- fit.FSD(Y, breaks=brks.size, dist="gld"))
#(fity2 <- fit.FSD(Y, breaks=brks.size, dist="pd"))
#(fity3 <- fit.FSD(Y, breaks=brks.size, dist="gpd"))

#(out11 <- fit.Copula(fitx1, fity1, mxy2))
#(out12 <- fit.Copula(fitx1, fity2, mxy2))
#(out21 <- fit.Copula(fitx2, fity1, mxy2))
#(out22 <- fit.Copula(fitx2, fity2, mxy2))
#(out13 <- fit.Copula(fitx1, fity3, mxy2))
#(out23 <- fit.Copula(fitx2, fity3, mxy2))

##postscript(file="fig6.eps",paper='letter')

#par(mfrow=c(1,1))
#plot(out11,grid.size=40,nlevels=20,lty=2,
#     ylim=c(4.8,16),xlim=c(0,18),
#     xlab="Firm Age",ylab="Firm Size",
#     main="(d) Contour Plot -- (GLD+GLD)")
#plot(out12,grid.size=40,nlevels=30, col=2, plot.new=FALSE)
## or use the command below
## plot(out2,grid.size=50,nlevels=50, col=4, add=TRUE)
##dev.off()


bda documentation built on Sept. 11, 2024, 9:01 p.m.

Related to fitFSD in bda...