R/gompChooseAges.R

Defines functions fertGompPF

Documented in fertGompPF

#' Gompertz PF Fertility Estimation
#'
#' Performs Gompertz PF model for fertility estimation by user's F and P points selection
#'
#' @param ages A vector of starting ages of five-year age groups ranging from 15 to 45 (default = c(15,20,25,30,35,40,45))
#' @param P A vector of mean parities by five-year age group - same groups as 'ages'
#' @param asfr A vector of age-specific fertility rates by five-year age group - same groups as 'ages'
#' @param level TRUE for correction of fertility level using parity data information, false for correction of fertility shape only
#' @param madef Mother's age definition: '0m' for age at birth of child, '12m' for age at survey for 12 months data (default),
#' '24m' for age at survey for 24 months data, '36m' for age at survey for 36 months data
#'
#' @return A list with 3 elements:
#' pf_data data frame with columns ages, P for mean parities, asfr, Fi for cumulate fertility estimated from Brass coefficients,PF for ratios P/F and adj_asfr for adjusted asfr;
#' tfr_unadj for unadjusted total fertility rate estimate;
#' and tfr_adj for adjusted total fertility rate estimate by applying the selected age-group PF ratio
#' @export
#' @source
#' Brass W, AJ. 1968. Coale Methods  of  analysis  and  estimation.  In:  BRASS,  W.  et  al.  (Ed.).  The demography of tropical Africa. 1. ed. New Jersey: Princeton University Press, p. 88-139.
#' Brass W. 1975. Methods for Estimating Fertility and Mortality from Limited and Defected Data. North Carolina: Carolina Population Center.
#' @examples
#' ## Malawi 2008 Census data:
#' ages_ma = c(15, 20, 25, 30, 35, 40, 45)
#' asfr_ma = c(0.111, 0.245, 0.230, 0.195, 0.147, 0.072, 0.032)
#' P_ma    = c(0.283, 1.532, 2.849, 4.185, 5.214, 6.034, 6.453)
#' fertGompPF( ages            = ages_ma,
#'             asfr            = asfr_ma,
#'             P               = P_ma,
#'             level           = TRUE,
#'             madef           = '24m',
#'             sel.ages        = c( 20, 25, 30, 35, 40, 45 ),
#'             plot.diagnostic = FALSE)
#'

fertGompPF <-
  function( ages            = seq( 15, 45, 5 ),
           asfr,
           P               = NULL,
           level           = FALSE,
           madef           = '12m'
           ){


    # 1. Adjust the inputs into the correct form for the method
    adjustGompInput <-
      function( ages,
               asfr,
               P,      # default = null
               level,  # default = FALSE
               madef   # default = '12m'
               ){


        # 1.1 Check if level = TRUE and P is provided
        if (level & is.null( P )){
          stop('Please provide vector P for mean number of children ever born by women age group')
        }

        # 1.2 Check if input lengths match
        if (level){

          stopifnot( all.equal( length(ages), length(P), length(asfr)) )

          # 1.3. Adjust data inputs for Gompertz application if asfr and P vectors are given for ages 15+
          if(length(asfr) == 7){
            asfr <-
              c( NA, asfr )
          }

          if(length(P) == 7){
            Pvec <-
              c( NA, P )
          }

        } else{

          stopifnot( all.equal( length(ages), length(asfr)) )

          # 1.3. Adjust data inputs for Gompertz application if asfr vector is given for ages 15+
          if(length(asfr) == 7){
            asfr <-
              c( NA, asfr )
          }

          if ( is.null( P ) ){

            Pvec <-
              rep( NA, 8 )

          } else {

            Pvec <-
              c( NA, P)

          }

        }

        # 1.4. Provide age groups and respective lower bound, upper bound and age.shift
        age.lb <-
          seq( 10, 45, 5)

        age.ub <-
          seq( 15, 50, 5)

        age.group <-
          c( "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40-44", "45-49" )

        if ( madef == '0m'){
          age.shift <-
            age.ub
        } else if ( madef == '12m' ){
          age.shift <-
            age.ub - 0.5
        } else if ( madef == '24m' ){
          age.shift <-
            age.ub - 1
        } else if ( madef == '36m' ){
          age.shift <-
            age.ub - 1.5
        } else {
          stop( 'Incorrect madef entry! Try either 0m, 12m, 24m or 36m' )
        }

        # 1.5. return adjusted data for gompertz method
        gomp.dat <-
          data.frame(
            age.group,
            age.lb,
            age.ub,
            age.shift,
            asfr,
            P = Pvec
          )

        if ( is.null(P) ){
          gomp.dat <-
            gomp.dat[ , - 6 ]
        }

        return(gomp.dat)
      }

    inputGomp.dat <-
      adjustGompInput(
        ages  = ages,
        asfr  = asfr,
        P     = P,
        level = level,
        madef = madef
      )

    # 2. Compute F gompits for estimation of ex, zx and gx points

    F.GompitCalc <-
      function(
               age.group,
               age.shift,
               age.noshift,
               asfr
               ){

        std.zaba <- DemoToolsData::std.zaba

        # 2.1 Merge ages and asfr data with correspondent zaba std Fx value for age shift defined in madef
        fgomp.dat <-
          merge(
            data.frame(
              age.group,
              age.shift,
              age.noshift,
              asfr
            ),
            std.zaba[ , c( 'age', 'Fx' ) ],
            by.x = 'age.shift',
            by.y = 'age',
            all.x = TRUE
          )

        names(fgomp.dat)[ncol(fgomp.dat)] <-
          'Fx.std'

        # 2.2 Merge ages and asfr data with correspondent zaba std Fx value for no shift in ages
        fgomp.dat <-
          merge(
            fgomp.dat,
            std.zaba[ , c( 'age', 'Fx' ) ],
            by.x = 'age.noshift',
            by.y = 'age',
            all.x = TRUE
          )

        names(fgomp.dat)[ncol(fgomp.dat)] <-
          'Fx.stdnoshift'

        # 2.3 Generate estimates for gx, ex and zx for F points

        ## A. First Gompit
        fgomp.dat$Yf.std <-
          - log( - log( fgomp.dat$Fx.std ) )

        ## B. Ratio of adjacent groups Fx/Fx+5
        fgomp.dat$Fx_x5.std <-
          NA

        for(i in 1:7){
          fgomp.dat$Fx_x5.std[i] <-
            fgomp.dat$Fx.std[i] / fgomp.dat$Fx.std[i+1]
        }

        ## C. Gompit of ratios equal phi
        fgomp.dat$phi <-
          - log( - log( fgomp.dat$Fx_x5.std ) )

        ## D. Parameter phi' = g(x)
        ## phi'= (exp(Ys.x)*Ys.x+5)-Ys.x*exp(Ys.x+5))/(exp(Ys.x)-exp(Ys.x+5))

        fgomp.dat$phi.1 <-
          NA

        for(i in 1:7){
          fgomp.dat$phi.1[i] <-
            ( ( exp( fgomp.dat$Yf.std[i] ) * fgomp.dat$Yf.std[i+1] ) - fgomp.dat$Yf.std[i] * exp( fgomp.dat$Yf.std[i+1] ) ) /
            ( exp( fgomp.dat$Yf.std[i] ) - exp( fgomp.dat$Yf.std[i+1] ) )
        }

        fgomp.dat$gx <-
          round( fgomp.dat$phi.1, 4 )

        ## E. Parameter phi''
        ## Parameter phi'' - only for 15-30 years old, the mean gives value of parameter c
        ## phi''= (Ys.x-Ys.x+5)?*exp(Ys.x+Ys.x+5)/(exp(Ys.x)-exp(Ys.x+5))

        fgomp.dat$phi.2 <-
          NA

        for(i in 2:4){
          fgomp.dat$phi.2[i] <-
            ( ( fgomp.dat$Yf.std[i] - fgomp.dat$Yf.std[i+1] ) ^ 2 * exp( fgomp.dat$Yf.std[i] + fgomp.dat$Yf.std[i+1] ) ) /
            ( exp( fgomp.dat$Yf.std[i] ) - exp( fgomp.dat$Yf.std[i+1] ) ) ^ 2
        }

        fgomp.dat$c.F <-
          round( mean( fgomp.dat$phi.2, na.rm = T ), 4 )

        ## F. e(x) = difference between ratios gompit (phi) and phi'
        fgomp.dat$ex <-
          round( fgomp.dat$phi - fgomp.dat$phi.1, 4 )

        ## G. Parameter z(x) - based on observed data

        ## cumulate asfr
        fgomp.dat$Fx.obs <-
          c( 0, 5 * cumsum( stats::na.omit( fgomp.dat$asfr ) ) )

        ## Fx ratios
        fgomp.dat$Fx_x5.obs <-
          NA

        for(i in 1:7){
          fgomp.dat$Fx_x5.obs[i] <-
            fgomp.dat$Fx.obs[i] / fgomp.dat$Fx.obs[i+1]
        }

        ## estimating z(x), gompi from cumulated ratios
        fgomp.dat$zx <-
          round( c( NA, ( -log( -log( fgomp.dat$Fx_x5.obs[2:7] ) ) ), NA ), 4 )

        ## H. Return selected arguments
        fgomp.dat <-
          fgomp.dat[ , c( 'age.group', 'age.noshift', 'age.shift', 'asfr', 'Fx.std', 'Fx.stdnoshift', 'Fx.obs', 'Yf.std', 'gx', 'ex', 'zx', 'c.F')]

        return(fgomp.dat)
      }

    Gomp.Fdat <-
      F.GompitCalc(
        age.group   = inputGomp.dat$age.group,
        age.shift   = inputGomp.dat$age.shift,
        age.noshift = inputGomp.dat$age.ub,
        asfr        = inputGomp.dat$asfr
      )

    # 3. Compute P gompits for estimation of ei, zi and gi points

    P.GompitCalc <-
      function(
               age.group,
               age.noshift,
               P
               ){

        # 3.1 Merge ages and P data with correspondent zaba std Px_x5 value for exact ages
        pgomp.dat <-
          merge(
            data.frame(
              age.group,
              age.noshift,
              P
            ),
            std.zaba[, c( 'age', 'Px_x5' )],
            by.x = 'age.noshift',
            by.y = 'age',
            all.x = TRUE
          )

        names(pgomp.dat)[ncol(pgomp.dat)] <-
          'P.std'

        # 3.2 Generate estimates for gi, ei and zi for P points

        ## A. First Gompi
        pgomp.dat$Yp.std <-
          - log( - log( pgomp.dat$P.std ) )

        ## B. Ratios P(i)/P(i+1)
        pgomp.dat$P.ratio <-
          NA

        for(i in 1:7){
          pgomp.dat$P.ratio[i] <-
            pgomp.dat$P.std[i] / pgomp.dat$P.std[i+1]
        }

        # For last age group, Pi/1
        pgomp.dat$P.ratio[ nrow( pgomp.dat ) ] <-
          pgomp.dat$P.std[ nrow( pgomp.dat ) ] / 1

        ## C. Gompit of ratios equal phi
        pgomp.dat$phi <-
          - log( - log( pgomp.dat$P.ratio ) )

        ## D. parameter phi' = gi
        ## phi'= (exp(Ys.i)*Ys.i+1)-Ys.i*exp(Ys.i+1))/(exp(Ys.i)-exp(Ys.i+1))

        pgomp.dat$phi.1 <-
          NA

        for(i in 1:7){
          pgomp.dat$phi.1[i] <-
            ( ( exp( pgomp.dat$Yp.std[i] ) * pgomp.dat$Yp.std[i+1]) - pgomp.dat$Yp.std[i] * exp( pgomp.dat$Yp.std[i+1] ) ) /
            ( exp( pgomp.dat$Yp.std[i] ) - exp( pgomp.dat$Yp.std[i+1] ) )
        }

        pgomp.dat$phi.1[8] <-
          pgomp.dat$Yp.std[8]

        pgomp.dat$gi <-
          round( pgomp.dat$phi.1, 4 )

        ## E. Parameter phi'' - only for 15-30 years old, the mean gives value of parameter c
        ## phi''= (Ys.x-Ys.x+5)?*exp(Ys.x+Ys.x+5)/(exp(Ys.x)-exp(Ys.x+5))?

        pgomp.dat$phi.2 <-
          NA

        for(i in 2:4){
          pgomp.dat$phi.2[i]  <-
            ( ( pgomp.dat$Yp.std[i]- pgomp.dat$Yp.std[i+1]) ^ 2 * exp( pgomp.dat$Yp.std[i] + pgomp.dat$Yp.std[i+1] ) ) /
            ( exp( pgomp.dat$Yp.std[i] ) - exp( pgomp.dat$Yp.std[i+1] ) ) ^ 2
        }

        pgomp.dat$c.P <-
          round( mean( pgomp.dat$phi.2, na.rm = T ), 4 )

        ## F. e(i) = difference between ratios gompit (phi) and phi'
        pgomp.dat$ei <-
          round( pgomp.dat$phi -  pgomp.dat$phi.1, 4 )

        ## G. Parameter z(i) - based on observed data

        # x / x+5 ratios
        pgomp.dat$Px_x5.obs <-
          NA

        for(i in 1:7){
          pgomp.dat$Px_x5.obs[i] <-
            pgomp.dat$P[i] / pgomp.dat$P[i+1]
        }

        # estimating z(i), gompi from P ratios
        pgomp.dat$zi <-
          round( c( NA, ( - log( -log( pgomp.dat$Px_x5.obs[2:7] ) ) ), NA ), 4 )

        ## H. Return selected arguments
        pgomp.dat <-
          pgomp.dat[ , c( 'age.group', 'age.noshift', 'P', 'P.std', 'Yp.std', 'gi', 'ei', 'zi', 'c.P')]

        return(pgomp.dat)
      }

    if ( level ){
      Gomp.Pdat <-
        P.GompitCalc(
          age.group   = inputGomp.dat$age.group,
          age.noshift = inputGomp.dat$age.ub,
          P           = inputGomp.dat$P
        )
    }

    # 4. Fit Alpha and Beta values

    fitGompPF <-
      function(
               age.group,
               age.ub,
               gi = NULL,
               ei = NULL,
               zi = NULL,
               gx,
               ex,
               zx,
               level = FALSE,
               c.F,
               c.P = NA
               ){

        p.flag <-
          level & !is.null(gi) & !is.null(ei) & !is.null(zi)

        fitGomp.Pdat <-
          NULL

        # 4.1. Set data
        if( p.flag ){
          age.group <-
            rep( age.group, 2 )

          age.ub <-
            rep( age.ub, 2 )
        }

        fitGomp.dat <-
          data.frame(
            age.group = age.group,
            age.ub    = age.ub,
            g         = round( c( gx, gi), 4 ),
            e         = round( c( ex, ei), 4 ),
            z         = round( c( zx, zi), 4 ),
            point.lab = c( rep( 'F-Points', length(gx) ), rep( 'P-Points', length(gi) ) )
          )

        fitGomp.dat <-
          fitGomp.dat[ !fitGomp.dat$age.ub %in% c( 15, 50 ) , ] # filter extreme ages

        fitGomp.dat$y <-
          fitGomp.dat$z - fitGomp.dat$e

        fitGomp.dat$x <-
          fitGomp.dat$g

        # 4.2. Fit alpha and beta for all F and P points for initial graph

        Fall.model <-
          lm(
            y ~ x,
            data = fitGomp.dat[ fitGomp.dat$point.lab == 'F-Points', ]
          )

        Fall.beta  <-
          Fall.model$coefficients[2]

        Fall.intercept <-
          Fall.model$coefficients[1]

        if ( p.flag ){

          FPall.model <-
            lm(
              y ~ x,
              data = fitGomp.dat
            )

          FPall.beta  <-
            FPall.model$coefficients[2]

          FPall.intercept <-
            FPall.model$coefficients[1]

          Pall.model <-
            lm(
              y ~ x,
              data = fitGomp.dat[ fitGomp.dat$point.lab == 'P-Points', ]
            )

          Pall.beta  <-
            Pall.model$coefficients[2]

          Pall.intercept <-
            Pall.model$coefficients[1]
        }

        # 4.3 Plot diagnostic graphs for points selection

        pointsel.incomplete <- T

        grDevices::x11( width = 6, height = 6 )
        plot(
          x    = fitGomp.dat[ fitGomp.dat$point.lab == "F-Points", ]$x,
          y    = fitGomp.dat[ fitGomp.dat$point.lab == "F-Points", ]$y,
          col  = 'tomato3',
          pch  = 1,
          xlab ='g()',
          ylab = 'z()-e()',
          cex = 1.5,
          xlim = range( fitGomp.dat$x ),
          ylim = range( fitGomp.dat$y ),
          main = 'Select sequential F points for fertility estimation\nAttention: 0.80 < beta < 1.25 and |alpha| < 0.3'
        )
        graphics::points(
                    x   = fitGomp.dat[ fitGomp.dat$point.lab == "P-Points", ]$x,
                    y   = fitGomp.dat[ fitGomp.dat$point.lab == "P-Points", ]$y,
                    col = 'skyblue',
                    pch = 0,
                    cex = 1.5
                  )
        graphics::abline(
                    a   = FPall.intercept,
                    b   = FPall.beta,
                    col = "gray65",
                    lwd = 0.75
                  )
        graphics::abline(
                    a   = Fall.intercept,
                    b   = Fall.beta,
                    col = "tomato3",
                    lty = "dashed",
                    lwd = 1.5
                  )
        graphics::abline(
                    a   = Pall.intercept,
                    b   = Pall.beta,
                    col = "skyblue",
                    lwd = 1.5
                  )
        graphics::grid( col = "gray70", lty = "dotted", equilogs = TRUE)
        graphics::legend(
                    'bottomright',
                    c( 'F-Points', 'P-Points' ),
                    col = c( 'tomato3', 'skyblue' ),
                    pch = c( 19, 15 ),
                    cex = 1.2,
                    bty = "n",
                    horiz = T
                  )

        while( pointsel.incomplete ){

          # select F-Points

          fitGomp.Fdat <-
            fitGomp.dat[ fitGomp.dat$point.lab == "F-Points", ]

          sel.Fpoints <-
            identify(
              x = fitGomp.Fdat$x,
              y = fitGomp.Fdat$y,
              labels = fitGomp.Fdat$age.group
            )

          Fsel.model <-
            lm(
              y ~ x,
              data = fitGomp.Fdat[ sel.Fpoints , ]
            )

          Fsel.beta  <-
            Fsel.model$coefficients[2]

          Fsel.intercept  <-
            Fsel.model$coefficients[1]

          Fsel.alpha <-
            Fsel.intercept - ( Fsel.beta - 1 ) ^ (2) * c.F / 2

          # add selected points to plot
          graphics::points(
                      x   = fitGomp.Fdat[ sel.Fpoints, ]$x,
                      y   = fitGomp.Fdat[ sel.Fpoints, ]$y,
                      col = 'tomato3',
                      pch = 19,
                      cex = 1.5
                    )
          graphics::abline(
                      a = Fsel.intercept,
                      b = Fsel.beta,
                      col = "tomato3",
                      lty = "dashed",
                      lwd = 1.5
                    )
          graphics::mtext(
                      side = 3,
                      line = -1,
                      text = paste0('   alpha.F = ', round( Fsel.alpha, 3 ),
                                    '; beta.F = ', round( Fsel.beta, 3 ),
                                    '; intercept.F = ', round( Fsel.intercept, 3 ) ),
                      cex = 1.0,
                      adj = 0
                    )
          graphics::grid( col = "gray70", lty = "dotted", equilogs = TRUE)
          graphics::legend(
                      'bottomright',
                      c( 'F-Points', 'P-Points' ),
                      col = c( 'tomato3', 'skyblue' ),
                      pch = c( 19, 15 ),
                      cex = 1.2,
                      bty = "n",
                      horiz = T
                    )

          if ( p.flag ){
            # add P points to plot if there are any
            plot(
              x    = fitGomp.dat[ fitGomp.dat$point.lab == "P-Points", ]$x,
              y    = fitGomp.dat[ fitGomp.dat$point.lab == "P-Points", ]$y,
              col = 'skyblue',
              pch  = 0,
              xlab ='g()',
              ylab = 'z()-e()',
              cex = 1.5,
              xlim = range( fitGomp.dat$x ),
              ylim = range( fitGomp.dat$y ),
              main = 'SELECT P POINTS\n 0.80 < beta < 1.25 and |alpha| < 0.3'
            )
            graphics::points(
                        x   = fitGomp.dat[ fitGomp.dat$point.lab == "F-Points", ]$x,
                        y   = fitGomp.dat[ fitGomp.dat$point.lab == "F-Points", ]$y,
                        col = 'tomato3',
                        pch = 1,
                        cex = 1.5
                      )
            graphics::points(
                        x   = fitGomp.Fdat[ sel.Fpoints, ]$x,
                        y   = fitGomp.Fdat[ sel.Fpoints, ]$y,
                        col = 'tomato3',
                        pch = 19,
                        cex = 1.5
                      )
            graphics::abline(
                        a = Fsel.intercept,
                        b = Fsel.beta,
                        col = "tomato3",
                        lty = "dashed",
                        lwd = 1.5
                      )
            graphics::mtext(
                        side = 3,
                        line = -1,
                        text = paste0('   alpha.F = ', round( Fsel.alpha, 3 ),
                                      '; beta.F = ', round( Fsel.beta, 3 ),
                                      '; intercept.F = ', round( Fsel.intercept, 3 ) ),
                        cex = 1.0,
                        adj = 0
                      )
            graphics::grid( col = "gray70", lty = "dotted", equilogs = TRUE)
            graphics::legend(
                        'bottomright',
                        c( 'F-Points', 'P-Points' ),
                        col = c( 'tomato3', 'skyblue' ),
                        pch = c( 19, 15 ),
                        cex = 1.2,
                        bty = "n",
                        horiz = T
                      )


            # select P-Points
            fitGomp.Pdat <-
              fitGomp.dat[ fitGomp.dat$point.lab == "P-Points", ]

            sel.Ppoints <-
              identify(
                x = fitGomp.Pdat$x,
                y = fitGomp.Pdat$y,
                labels = fitGomp.Pdat$age.group
              )

            Psel.model <-
              lm(
                y ~ x,
                data = fitGomp.Pdat[ sel.Ppoints , ]
              )

            Psel.beta  <-
              Psel.model$coefficients[2]

            Psel.intercept  <-
              Psel.model$coefficients[1]

            Psel.alpha <-
              Psel.intercept - ( Psel.beta - 1 ) ^ (2) * c.P / 2

            # add selected P points to plot
            graphics::points(
                        x   = fitGomp.Pdat[ sel.Ppoints, ]$x,
                        y   = fitGomp.Pdat[ sel.Ppoints, ]$y,
                        col = 'skyblue',
                        pch = 15,
                        cex = 1.5
                      )
            graphics::abline(
                        a = Psel.intercept,
                        b = Psel.beta,
                        col = "skyblue",
                        lty = 1,
                        lwd = 1.5
                      )
            graphics::mtext(
                        side = 3,
                        line = -2,
                        text = paste0('   alpha.P = ', round( Psel.alpha, 3 ),
                                      '; beta.P = ', round( Psel.beta, 3 ),
                                      '; intercept.P = ', round( Psel.intercept, 3 ) ),
                        cex = 1.0,
                        adj = 0
                      )
            graphics::grid( col = "gray70", lty = "dotted", equilogs = TRUE)
            graphics::legend(
                        'bottomright',
                        c( 'F-Points', 'P-Points' ),
                        col = c( 'tomato3', 'skyblue' ),
                        pch = c( 19, 15 ),
                        cex = 1.2,
                        bty = "n",
                        horiz = T
                      )

          }

          # combined p and f data selection
          fitGomp.FPdat <-
            rbind(
              fitGomp.Fdat[ sel.Fpoints, ],
              fitGomp.Pdat[ sel.Ppoints, ]
            )

          # selected ages
          sel.ages <-
            fitGomp.FPdat$age.ub

          # new model and parameters
          FPsel.model <-
            lm(
              y ~ x,
              data = fitGomp.FPdat
            )

          FPsel.beta  <-
            FPsel.model$coefficients[2]

          FPsel.intercept  <-
            FPsel.model$coefficients[1]

          FPsel.alpha <-
            FPsel.intercept - ( FPsel.beta - 1 ) ^ (2) * mean( c.F, c.P, na.rm = TRUE ) / 2

          graphics::abline(
                      a = FPsel.intercept,
                      b = FPsel.beta,
                      col = "black",
                      lty = 3,
                      lwd = 1.5
                    )
          graphics::mtext(
                      side = 3,
                      line = -3,
                      text = paste0('   alpha = ', round( FPsel.alpha, 3 ), '; beta = ', round( FPsel.beta, 3 ) ),
                      cex = 1,
                      adj = 0
                    )


          if ( abs( FPsel.alpha ) > 0.3 ){
            warning('Caution! Absolute alpha estimate value greater than 0.3!')
          }

          if ( FPsel.beta > 1.25 | FPsel.beta < 0.80  ){
            warning('Caution! Beta estimate value must be within the interval (0.80, 1.25)!')
          }

          pointsel.check <-  "x"

          while ( pointsel.check != "y" | pointsel.check != "n"){

            # check if customer is satisfied with point selection
            pointsel.check <-
              readline( "Are you done with point selection?(y=yes/n=no): \n" )

            if ( pointsel.check == "y" ){
              pointsel.incomplete <-  FALSE
              break
            }

            if ( pointsel.check == "n" ){

              pointsel.incomplete <- TRUE

              cat( 'New F and P points selection\n' )
              # new plot for point selection
              graphics.off()
              grDevices::x11( width = 6, height = 6 )
              plot(
                x    = fitGomp.dat[ fitGomp.dat$point.lab == "F-Points", ]$x,
                y    = fitGomp.dat[ fitGomp.dat$point.lab == "F-Points", ]$y,
                col  = 'tomato3',
                pch  = 1,
                xlab ='g()',
                ylab = 'z()-e()',
                cex = 1.5,
                xlim = range( fitGomp.dat$x ),
                ylim = range( fitGomp.dat$y ),
                main = 'Select sequential F points for fertility estimation\nAttention: 0.80 < beta < 1.25 and |alpha| < 0.3'
              )
              graphics::points(
                          x   = fitGomp.dat[ fitGomp.dat$point.lab == "P-Points", ]$x,
                          y   = fitGomp.dat[ fitGomp.dat$point.lab == "P-Points", ]$y,
                          col = 'skyblue',
                          pch = 0,
                          cex = 1.5
                        )
              graphics::abline(
                          a   = FPall.intercept,
                          b   = FPall.beta,
                          col = "gray65",
                          lwd = 0.75
                        )
              graphics::abline(
                          a   = Fall.intercept,
                          b   = Fall.beta,
                          col = "tomato3",
                          lty = "dashed",
                          lwd = 1.5
                        )
              graphics::abline(
                          a   = Pall.intercept,
                          b   = Pall.beta,
                          col = "skyblue",
                          lwd = 1.5
                        )
              graphics::grid( col = "gray70", lty = "dotted", equilogs = TRUE)
              graphics::legend(
                          'bottomright',
                          c( 'F-Points', 'P-Points' ),
                          col = c( 'tomato3', 'skyblue' ),
                          pch = c( 19, 15 ),
                          cex = 1.2,
                          bty = "n",
                          horiz = T
                        )
              break
            }

            if ( pointsel.check != "y" | pointsel.check != "n" ){
              cat( "Try again! y for yes or n for no!\n" )
            }

          }
        }

        rangeAll.x <-
          range( fitGomp.dat$x )

        rangeAll.y <-
          range( fitGomp.dat$y )

        if ( level ){

          grDevices::x11( width = 9, height = 6)
          graphics::par( mfrow = c( 1, 2 ) )

          ## A.1 All F points
          plot(
            x    = fitGomp.dat[ fitGomp.dat$point.lab == 'F-Points' ,]$x,
            y    = fitGomp.dat[ fitGomp.dat$point.lab == 'F-Points' ,]$y,
            pch  = 19,
            col  = 'skyblue',
            xlab = 'g()',
            ylab = 'z()-e()',
            xlim = rangeAll.x,
            ylim = rangeAll.y
          )
          graphics::abline( reg = Fall.model, col = 'skyblue' , lty = 5)

          ## A.2 All P points
          graphics::points(
                      x = fitGomp.dat[ fitGomp.dat$point.lab == 'P-Points' ,]$x,
                      y = fitGomp.dat[ fitGomp.dat$point.lab == 'P-Points' ,]$y,
                      pch = 15,
                      col = 'tomato3'
                    )
          graphics::abline( reg = Pall.model, col = 'tomato3' , lty = 3)
          graphics::legend(
                      'bottomright',
                      legend = c( 'F-points', 'P-points' ),
                      lty    = c(  5,  3 ),
                      pch    = c( 19, 15 ),
                      col    = c( 'skyblue', 'tomato3' ),
                      bty    = 'n'
                    )
          graphics::grid()
          graphics::mtext(
                      side = 3,
                      line = 2.35,
                      adj  = 0,
                      cex  = 1.15,
                      'Figure 1: All F Points and P points'
                    )
          graphics::mtext(
                      side = 3,
                      line = 0.60,
                      adj  = 0,
                      cex  = 0.75,
                      paste0( 'F-points linear: y(x) = ', round( Fall.intercept, 4) , ' + ', round( Fall.beta, 4 ) , ' * x\n',
                             'P-points linear: y(x) = ', round( Pall.intercept, 4) , ' + ', round( Pall.beta, 4 ) , ' * x' )
                    )
          graphics::text(
                      x = fitGomp.dat[ fitGomp.dat$point.lab == 'F-Points', ]$x,
                      y = fitGomp.dat[ fitGomp.dat$point.lab == 'F-Points', ]$y,
                      labels = fitGomp.dat[ fitGomp.dat$point.lab == 'F-Points', ]$age.group,
                      cex = 0.75,
                      pos = 4,
                      col = 'skyblue'
                    )
          graphics::text(
                      x = fitGomp.dat[ fitGomp.dat$point.lab == 'P-Points', ]$x,
                      y = fitGomp.dat[ fitGomp.dat$point.lab == 'P-Points', ]$y,
                      labels = fitGomp.dat[ fitGomp.dat$point.lab == 'P-Points', ]$age.group,
                      cex = 0.75,
                      pos = 4,
                      col = 'tomato3'
                    )

          ## B.1 Selected F points
          plot(
            x    = fitGomp.FPdat[ fitGomp.FPdat$point.lab == 'F-Points', ]$x,
            y    = fitGomp.FPdat[ fitGomp.FPdat$point.lab == 'F-Points', ]$y,
            pch  = 19,
            col  = 'skyblue',
            xlab = 'g()',
            ylab = 'z()-e()',
            xlim = rangeAll.x,
            ylim = rangeAll.y
          )
          graphics::abline( reg = Fsel.model, col = 'skyblue' , lty = 5)

          ## B.2 Selected P points
          graphics::points(
                      x = fitGomp.FPdat[ fitGomp.FPdat$point.lab == 'P-Points', ]$x,
                      y = fitGomp.FPdat[ fitGomp.FPdat$point.lab == 'P-Points', ]$y,
                      pch = 15,
                      col = 'tomato3'
                    )
          graphics::abline( reg = Psel.model, col = 'tomato3' , lty = 3)

          ## B.3 Combined P and F points
          graphics::abline( reg = FPsel.model, col = 'black' , lty = 1, lwd = 0.5)
          graphics::legend(
                      'bottomright',
                      legend = c( 'F-points', 'P-points', 'Combined F and P' ),
                      lty    = c(  5,  3 , 1),
                      pch    = c( 19, 15 , NA),
                      col    = c( 'skyblue', 'tomato3', 'black' ),
                      bty    = 'n'
                    )
          graphics::grid()
          graphics::mtext(
                      side = 3,
                      line = 2.35,
                      adj  = 0,
                      cex  = 1.15,
                      'Figure 2: Selected F Points and P points'
                    )
          graphics::mtext(
                      side = 3,
                      line = 0.10,
                      adj  = 0,
                      cex  = 0.75,
                      paste0( 'F-points linear: y(x) = ', round( Fsel.intercept, 4) , ' + ', round( Fsel.beta, 4 ) , ' * x\n',
                             'P-points linear: y(x) = ', round( Psel.intercept, 4) , ' + ', round( Psel.beta, 4 ) , ' * x\n',
                             'Combined F and P linear: y(x) = ', round( FPsel.intercept, 4) , ' + ', round( FPsel.beta, 4 ) , ' * x' )
                    )
          graphics::text(
                      x = fitGomp.FPdat[ fitGomp.FPdat$point.lab == 'F-Points', ]$x,
                      y = fitGomp.FPdat[ fitGomp.FPdat$point.lab == 'F-Points', ]$y,
                      labels = fitGomp.FPdat[ fitGomp.FPdat$point.lab == 'F-Points', ]$age.group,
                      cex = 0.75,
                      pos = 4,
                      col = 'skyblue'
                    )
          graphics::text(
                      x = fitGomp.FPdat[ fitGomp.FPdat$point.lab == 'P-Points', ]$x,
                      y = fitGomp.FPdat[ fitGomp.FPdat$point.lab == 'P-Points', ]$y,
                      labels = fitGomp.FPdat[ fitGomp.FPdat$point.lab == 'F-Points', ]$age.group,
                      cex = 0.75,
                      pos = 4,
                      col = 'tomato3'
                    )

        }

        else{

          grDevices::x11( width = 9, height = 6)
          graphics::par( mfrow = c( 1, 2 ) )

          ## A.1 All F points
          plot(
            x    = fitGomp.dat[ fitGomp.dat$point.lab == 'F-Points' ,]$x,
            y    = fitGomp.dat[ fitGomp.dat$point.lab == 'F-Points' ,]$y,
            pch  = 19,
            col  = 'skyblue',
            xlab = 'g()',
            ylab = 'z()-e()',
            xlim = rangeAll.x,
            ylim = rangeAll.y
          )
          graphics::abline( reg = Fall.model, col = 'skyblue' , lty = 5)
          graphics::grid()
          graphics::mtext(
                      side = 3,
                      line = 2,
                      adj  = 0,
                      cex  = 1.25,
                      'Figure 1: All F Points'
                    )
          graphics::mtext(
                      side = 3,
                      line = 0.75,
                      adj  = 0,
                      cex  = 1,
                      paste0( 'F-points linear: y(x) = ', round( Fall.intercept, 4) , ' + ', round( Fall.beta, 4 ) , ' * x')
                    )
          graphics::text(
                      x = fitGomp.dat[ fitGomp.dat$point.lab == 'F-Points', ]$x,
                      y = fitGomp.dat[ fitGomp.dat$point.lab == 'F-Points', ]$y,
                      labels = fitGomp.dat[ fitGomp.dat$point.lab == 'F-Points', ]$age.group,
                      cex = 0.75,
                      pos = 4,
                      col = 'skyblue'
                    )

          ## B.1 Selected F points
          plot(
            x    = fitGomp.FPdat[ fitGomp.FPdat$point.lab == 'F-Points', ]$x,
            y    = fitGomp.FPdat[ fitGomp.FPdat$point.lab == 'F-Points', ]$y,
            pch  = 19,
            col  = 'skyblue',
            xlab = 'g()',
            ylab = 'z()-e()',
            xlim = rangeAll.x,
            ylim = rangeAll.y
          )
          graphics::abline( reg = Fsel.model, col = 'skyblue' , lty = 5)
          graphics::grid()
          graphics::mtext(
                      side = 3,
                      line = 2,
                      adj  = 0,
                      cex  = 1.25,
                      'Figure 2: Selected F Points'
                    )
          graphics::mtext(
                      side = 3,
                      line = 0.75,
                      adj  = 0,
                      cex  = 1,
                      paste0( 'F-points linear: y(x) = ', round( Fsel.intercept, 4) , ' + ', round( Fsel.beta, 4 ) , ' * x')
                    )
          graphics::text(
                      x = fitGomp.FPdat[ fitGomp.FPdat$point.lab == 'F-Points', ]$x,
                      y = fitGomp.FPdat[ fitGomp.FPdat$point.lab == 'F-Points', ]$y,
                      labels = fitGomp.FPdat[ fitGomp.FPdat$point.lab == 'F-Points', ]$age.group,
                      cex = 0.75,
                      pos = 4,
                      col = 'skyblue'
                    )
        }

        # 4.5 Create output data.frame for regression and compute RMSE
        fitGomp.reg <-
          fitGomp.FPdat

        fitGomp.reg$RMSE <-
          round ( ( ( FPsel.intercept + FPsel.beta * fitGomp.reg$x ) - fitGomp.reg$y ) ^ 2, 4 )

        # 4.6 Create coefficient parameters

        if ( level ){
          coeffs.Gomp <-
            data.frame(
              F.beta  = Fsel.beta,
              F.alpha = Fsel.alpha,
              F.intercept = Fsel.intercept,
              P.beta  = Psel.beta,
              P.alpha = Psel.alpha,
              P.intercept = Psel.intercept,
              FP.beta  = FPsel.beta,
              FP.alpha = FPsel.alpha,
              FP.intercept = FPsel.intercept,
              row.names = NULL
            )
        } else {
          coeffs.Gomp <-
            data.frame(
              F.beta  = Fsel.beta,
              F.alpha = Fsel.alpha,
              F.intercept = Fsel.intercept,
              row.names = NULL
            )
        }

        # 4.7 Create output list with coefficients and point estimates
        out.fitGomp <-
          list(
            coeffs.Gomp = coeffs.Gomp,
            fitGomp.reg = fitGomp.reg
          )

        return(out.fitGomp)
      }

    if ( level ){
      coeffGomp.dat <-
        fitGompPF(
          age.group = inputGomp.dat$age.group,
          age.ub    = inputGomp.dat$age.ub,
          gi        = Gomp.Pdat$gi,
          ei        = Gomp.Pdat$ei,
          zi        = Gomp.Pdat$zi,
          gx        = Gomp.Fdat$gx,
          ex        = Gomp.Fdat$ex,
          zx        = Gomp.Fdat$zx,
          level     = level,
          c.F       = unique( Gomp.Fdat$c.F ),
          c.P       = unique( Gomp.Pdat$c.P )
        )
    } else {
      coeffGomp.dat <-
        fitGompPF(
          age.group = inputGomp.dat$age.group,
          age.ub    = inputGomp.dat$age.ub,
          gx        = Gomp.Fdat$gx,
          ex        = Gomp.Fdat$ex,
          zx        = Gomp.Fdat$zx,
          c.F       = unique( Gomp.Fdat$c.F )
        )
    }

    # 5. Estimate fmx for true ages (no shift) and correction level from beta and alpha

    AntiGompitCalc <-
      function(
               age.group,
               age.shift,
               age.noshift,
               level = FALSE,
               coeffs.Gomp,
               Fx.std,
               Fx.stdnoshift,
               Fx.obs,
               P.std = NULL,
               P.obs = NULL,
               sel.ages
               ){

        F.alpha <-
          coeffs.Gomp$F.alpha

        F.beta <-
          coeffs.Gomp$F.beta

        P.level <-  NULL

        if ( level & !is.null( P.std ) & !is.null( P.obs ) ){

          F.alpha <-
            coeffs.Gomp$FP.alpha

          F.beta <-
            coeffs.Gomp$FP.beta

          Pmodel.dat <-
            data.frame(
              age.group,
              age.noshift,
              P.obs,
              P.std,
              Yp.std = - log( - log( P.std ) )
            )

          Pmodel.dat$Yp.fit <-
            Pmodel.dat$Yp.std * coeffs.Gomp$FP.beta + coeffs.Gomp$FP.alpha

          Pmodel.dat$P.fit <-
            exp( - exp( - Pmodel.dat$Yp.fit ) )

          Pmodel.dat$actual.cumulant <-
            Pmodel.dat$P.obs / Pmodel.dat$P.fit

          P.level <-
            mean( Pmodel.dat$actual.cumulant[ Pmodel.dat$age.noshift %in% sel.ages ] )

        }

        Fmodel.dat <-
          data.frame(
            age.group,
            age.shift,
            age.noshift,
            Fx.obs,
            Fx.std,
            Fx.stdnoshift,
            Yf.std        = - log( - log( Fx.std ) ),
            Yf.stdnoshift = - log( - log( Fx.stdnoshift ) )
          )

        # shifted ages
        Fmodel.dat$Yf.fit <-
          Fmodel.dat$Yf.std * F.beta + F.alpha

        Fmodel.dat$Fx.fit <-
          exp( - exp( - Fmodel.dat$Yf.fit ) )

        Fmodel.dat$actual.cumulant <-
          Fmodel.dat$Fx.obs / Fmodel.dat$Fx.fit

        F.level <-
          mean( Fmodel.dat$actual.cumulant[ Fmodel.dat$age.noshift %in% sel.ages ] )

        FP.level <-
          ifelse ( is.null( P.level ),
                  F.level,
                  P.level
                  )

        Fmodel.dat$fmx <-
          Fmodel.dat$Fx.fit * FP.level / 5

        for( i in 1 : ( nrow( Fmodel.dat ) - 1 ) ){
          Fmodel.dat$fmx[ i + 1 ] <-
            ( Fmodel.dat$Fx.fit[ i + 1 ] - Fmodel.dat$Fx.fit[ i ] ) * FP.level / 5
        }

        Fmodel.dat$fmx <-
          round( Fmodel.dat$fmx, 4 )

        # conventional ages
        Fmodel.dat$Yf.fitnoshift <-
          Fmodel.dat$Yf.stdnoshift * F.beta + F.alpha

        Fmodel.dat$Fx.fitnoshift <-
          exp( - exp( - Fmodel.dat$Yf.fitnoshift ) )

        Fmodel.dat$fmx.noshift <-
          Fmodel.dat$Fx.fitnoshift * FP.level / 5

        for( i in 1 : ( nrow( Fmodel.dat ) - 1 ) ){
          Fmodel.dat$fmx.noshift[ i + 1 ] <-
            ( Fmodel.dat$Fx.fitnoshift[ i + 1 ] - Fmodel.dat$Fx.fitnoshift[ i ] ) * FP.level / 5
        }

        Fmodel.dat$fmx.noshift <-
          round( Fmodel.dat$fmx.noshift, 4 )

        Fmodel.dat$F.level <-
          F.level

        Fmodel.dat$FP.level <-
          FP.level

        outmodel.dat <-
          Fmodel.dat[, c( 'age.group', 'age.shift', 'fmx', 'age.noshift', 'fmx.noshift', 'F.level', 'FP.level' ) ]

        return( outmodel.dat )

      }

    if ( level ){
      AntiGompit.dat <-
        AntiGompitCalc(
          age.group = Gomp.Fdat$age.group,
          age.shift = Gomp.Fdat$age.shift,
          age.noshift = Gomp.Fdat$age.noshift,
          level = level,
          coeffs.Gomp = coeffGomp.dat$coeffs.Gomp,
          Fx.std = Gomp.Fdat$Fx.std,
          Fx.stdnoshift = Gomp.Fdat$Fx.stdnoshift,
          Fx.obs = Gomp.Fdat$Fx.obs,
          P.std = Gomp.Pdat$P.std,
          P.obs = Gomp.Pdat$P,
          sel.ages = sel.ages
        )
    } else {
      AntiGompit.dat <-
        AntiGompitCalc(
          age.group = Gomp.Fdat$age.group,
          age.shift = Gomp.Fdat$age.shift,
          age.noshift = Gomp.Fdat$age.noshift,
          coeffs.Gomp = coeffGomp.dat$coeffs.Gomp,
          Fx.std = Gomp.Fdat$Fx.std,
          Fx.stdnoshift = Gomp.Fdat$Fx.stdnoshift,
          Fx.obs = Gomp.Fdat$Fx.obs,
          sel.ages = Gomp.Fdat$age.ub
        )
    }


    # 6. Estimate PF series
    PFseries.Calc <-
      function(
               age.group,
               age.lb,
               age.ub,
               P.obs,
               F.level,
               coeffs.Gomp
               ){

        PFseries.dat <-
          data.frame(
            age.group,
            age.mid = (age.ub - age.lb) / 2 + age.lb,
            P       = P.obs
          )

        PFseries.dat <-
          merge(
            PFseries.dat,
            std.zaba[, c( 'age', 'Yx_std' ) ],
            by.x = 'age.mid',
            by.y = 'age'
          )

        PFseries.dat$Fx <-
          round( F.level * exp( - exp( - ( coeffs.Gomp$F.beta * PFseries.dat$Yx_std + coeffs.Gomp$F.intercept ) ) ), 4 )

        PFseries.dat$PF.Ratio <-
          round( PFseries.dat$P / PFseries.dat$Fx, 4 )

        outpf.dat <-
          PFseries.dat[, c('age.group', 'age.mid', 'P', 'Fx', 'PF.Ratio')]

        return(outpf.dat)
      }

    if ( !is.null( P ) ){
      PFseries.dat <-
        PFseries.Calc(
          age.group = inputGomp.dat$age.group,
          age.lb    = inputGomp.dat$age.lb,
          age.ub    = inputGomp.dat$age.ub,
          P.obs     = inputGomp.dat$P,
          F.level   = unique( AntiGompit.dat$F.level ),
          coeffs.Gomp = coeffGomp.dat$coeffs.Gomp
        )
    } else {
      PFseries.dat <- NULL
    }

    # 7. Create output data.frames

    ## 7.1 adjusted asfr
    asfr <-
      data.frame(
        age.group    = inputGomp.dat$age.group,
        asfr.obs     = inputGomp.dat$asfr,
        asfr.adj     = AntiGompit.dat$fmx.noshift
      )

    ## 7.2 total fertility rate
    tfr <-
      data.frame(
        TFR.obs  = round( sum( asfr$asfr.obs * 5, na.rm = T ), 4 ),
        TFR.adj  = round( sum( asfr$asfr.adj * 5, na.rm = T  ), 4 )
      )

    ## 7.3 adjusted parameters alpha and beta
    paramsReg <-
      coeffGomp.dat$coeffs.Gomp

    ## 7.4 regression variables
    varsReg <-
      coeffGomp.dat$fitGomp.reg

    ## 7.5 PF series
    pfSeries <-
      PFseries.dat

    fertGompPF.out <-
      list(
        asfr      = asfr,
        tfr       = tfr,
        paramsReg = paramsReg,
        varsReg   = varsReg,
        pfSeries  = pfSeries
      )

    return( fertGompPF.out )  }
josehcms/fertestr documentation built on Oct. 9, 2024, 9:03 p.m.