R/table_maker.r

Defines functions table_maker

table_maker <- function(DAT, year=2020, category_type = NULL, LBT_data=NULL, strata = "FADN_2018", mark_low_vals = TRUE, calculate="none", BIN = TRUE, year_of_interest_for_approaching=2024, required_benefit_as_fraction=0.05) {
 
    # DAT                 = The name of main dataset. This is usually BIN data.

    # year                = Default is 2020             - The year of the data, or NULL for all data. For now the BIN and LBT year are the same.

    # category_type       = Default is NULL             - Can be region, soil type or something else

    # LBT_data            = Data to replace empty BIN cells with or calculate fractions. This is usually LBT data.    

    # strata              = OPTIONS:
    #                       strata == "calculate"       -  Optimises strata based on year(s) selected
    #                       strata == "all"             -  Uses all SO boundaries as strata for each type
    #                       strata == "FADN_2018"       -  Uses the strata from the 2018 FADN Report
    #                       strata == NULL              -  Uses the strata from the 2018 FADN Report
    #                       Alternatively, a list with custom strata could be used (in the form of strata_in below)  

    # mark_low_vals       = TRUE/FALSE                  -  Whether to mark cells with few observations with colours    

    # calculate           = OPTIONS:
    #                       calculate == "fractions"                 -  Calculates Sampling fractions according to the Census by stratum
    #                       calculate == "variance"                  -  Uses all SO boundaries as strata for each type
    #                       calculate == "relative_svc_deviation"    -  Uses the strata from the 2018 FADN Report

    # BIN                 = TRUE/FALSE                  -  Whether DAT is BIN data

    # year_of_interest_for_approaching  = Default is 2024            - The year that you want to approach new firms for BIN

    # required_benefit_as_fraction      = Default is 0.05            - The required benefit to add another strata

    ### List of packages 
    packages <- c(
        "Rcpp",
        "officer",
        "devtools",
        "readxl",
        "writexl",
        "fuzzyjoin", 
        "htmltools", 
        "purrr", 
        "flextable", 
        "stringr",
        "stringi",  
        "magrittr",
        "Hmisc", 
        "tidyr",
        "dplyr",
        "data.table"
    )

    # This function only installs packages if they are not already installed and installs all libraries.
    x <- lapply(packages, function(packages) {if (!require(packages, character.only = TRUE, quietly = TRUE, warn.conflicts=FALSE)) {library(packages, character.only = T, quietly = TRUE, warn.conflicts=FALSE)}})

    ### Custom functions #################################################################################################################################################

    # Changes base R dput to deal with data.table
    dput = function(x, ...) { if(is.data.table(x)) { setattr(x, '.internal.selfref', NULL) }; base::dput(x, ...) }

    # Checks if all are NA or not
    is_all_na <- function(x)all(is.na(x))

    # Checks whether
    is.consec <- function(x1, x2, vec) {
        x1 %in% vec & x2 %in% vec & abs(diff(match(c(x1, x2), vec))) == 1L
    }

    # C++ code for strata intervals (this was a major bottleneck speed wise)
    Rcpp::cppFunction(
        "std::vector<std::string> interval(std::vector<int> &x,
                                            std::vector<std::vector<int>> &y){
            std::vector<std::string> z;
            z.reserve(x.size());
            std::transform(x.begin(), x.end(), y.begin(), std::back_inserter(z), 
            [&](int a, std::vector<int> b) {
                auto it = std::find_if(b.begin(), b.end(), [=](int w) {return a < w;});
                return it == b.end()? \"NA\":
                    '[' + std::to_string(*(it-1)) + ',' + std::to_string(*it) + ')';
            });
            return z;
        }"
    )

    # Puts columns string names in correct order
    # https://stackoverflow.com/questions/72141429/converting-column-names-so-they-can-be-put-in-an-numerical-order/72141622?noredirect=1#comment127466664_72141622
    library(magrittr)
    order_cols <- function(dat) {
        
        # look for words to order by
        s_ordered <- stringi::stri_extract_all_regex(colnames(dat), "[[:alpha:]]+") %>% 
            unlist() %>% 
            unique() %>% 
            sort()
        
        if (length(s_ordered) > 1) {
            # replace words with their alphabetical index
            cnames <- stringi::stri_replace_all_fixed(colnames(dat), s_ordered, seq_along(s_ordered), vectorise_all = FALSE)
        } else {
            cnames <- colnames(dat)
        }
        
        cnames %>% 
            stringi::stri_extract_all_regex("\\d+") %>% # extract all numbers (including the alphabetical index numbers)
            lapply(as.numeric) %>% 
            do.call(rbind, .) %>%    # bind list items to a matrix
            as.data.frame %>%        # change the matrix to a data.frame (i.e. a list)
            do.call(order, .)        # use the list for ordering
    }

            my_color_fun <- function(x) {
                if (calculate=="relative_svc_deviation") {
                    out <- rep("white", length(x))  
                    idx <- suppressWarnings(as.numeric(x) < 25 | as.numeric(x) > -25)
                    out[idx] <- 'green'
                    idx <- suppressWarnings(as.numeric(x) > 50 | as.numeric(x) < -50)
                    out[idx] <- 'orange'
                    idx <- suppressWarnings(as.numeric(x) > 100 | as.numeric(x) < -100)
                    out[idx] <- 'pink'
                    idx <- suppressWarnings(is.na(as.numeric(x)))
                    out[idx] <- 'red'
                    idx <- suppressWarnings(as.numeric(x) == 0)
                    out[idx] <- 'red'                    
                    out
                } else if (calculate=="variance") {
                    out <- rep("white", length(x))  
                    idx <- suppressWarnings(as.numeric(x) <= 10)
                    out[idx] <- 'yellow'
                    idx <- suppressWarnings(as.numeric(x) <= 5)
                    out[idx] <- 'orange'
                    idx <- suppressWarnings(as.numeric(x) <= 3)
                    out[idx] <- 'pink'
                    idx <- suppressWarnings(is.na(as.numeric(x)))
                    out[idx] <- 'red'
                    out
                } else if (is.na(categories_vec[1]) && calculate == "fractions") {
                    out <- rep("white", length(x))
                    idx <- suppressWarnings(as.numeric(x) == 0)
                    out[idx] <- 'red'
                    idx <- suppressWarnings(as.numeric(x) > 0.1)
                    out[idx] <- 'orange'
                    out
                } else if (is.na(categories_vec[1])) {
                    out <- rep("white", length(x))  
                    idx <- suppressWarnings(as.numeric(x) <= 10)
                    out[idx] <- 'yellow'
                    idx <- suppressWarnings(as.numeric(x) <= 5)
                    out[idx] <- 'orange'
                    idx <- suppressWarnings(as.numeric(x) <= 3)
                    out[idx] <- 'pink'
                    idx <- suppressWarnings(is.na(as.numeric(x)))
                    out[idx] <- 'red'
                    out
                } else {
                    out <- rep("white", length(x))  
                    idx <- suppressWarnings(as.numeric(x) <= 2)
                    out[idx] <- 'yellow'
                    idx <- suppressWarnings(as.numeric(x) <= 1)
                    out[idx] <- 'orange'
                    idx <- suppressWarnings(as.numeric(x) <= 0)
                    out[idx] <- 'pink'
                    idx <- suppressWarnings(is.na(as.numeric(x)))
                    out[idx] <- 'red'                    
                    out
                }
            }

            my_color_total_fun <- function(x) {
                if (!is.na(categories_vec[1]) && calculate=="none") {
                    out <- rep("white", length(x))  
                    idx <- suppressWarnings(as.numeric(x) <= 8)
                    out[idx] <- 'yellow'
                    idx <- suppressWarnings(as.numeric(x) <= 5)
                    out[idx] <- 'orange'
                    idx <- suppressWarnings(as.numeric(x) <= 1)
                    out[idx] <- 'pink'
                    idx <- suppressWarnings(as.numeric(x) <= 0)
                    out[idx] <- 'red'
                    idx <- suppressWarnings(is.na(as.numeric(x)))
                    out[idx] <- 'red'                    
                    out
                }
            }

    ### Settings ######################################################################################################################################

    required_benefit <- required_benefit_as_fraction

    ### Calculate Strata ####################################################################################################################################################

    # Use strata from FADN 2018 if data is BIN data (default = TRUE)
    if (isTRUE(BIN)) {
        
        # English Names and Strata if no strata provided
        strata_in <- list(
            "Starch potatoes" = c(0, 25, 100, 250, 500, 1000000),
            "Organic crops" = c(0, 25, 100, 250, 500, 1000000),
            "Other field crops (non-organic)" = c(0, 25, 100, 250, 500,1000, 1000000),
            "Vegetables under glass" = c(0, 25, 100, 500, 1000, 1500, 3000, 1000000),
            "Plants under glass" = c(0, 25, 100, 500, 1000, 1500, 3000, 1000000),
            "Flowers under glass" = c(0, 25, 100, 500, 1000, 1500, 3000, 1000000),
            "Field vegetables" = c(0, 25, 100, 500, 1000, 1000000),
            "Fruit" = c(0, 25, 100, 250, 500, 1000000),
            "Tree nursery" = c(0, 25, 100, 500, 1000, 1000000),
            "Flower bulbs" = c(0, 25, 100, 500, 1000, 1000000),
            "Other horticulture" = c(0, 25, 100, 500, 1000, 1000000),
            "Dairy (organic)" = c(0, 25, 100, 250, 500, 1000000),
            "Dairy (non-organic)" = c(0, 25, 100, 250, 500,1000, 1000000),
            "Calf fattening" = c(0, 25, 100, 500, 1000, 1000000),
            "Goats" = c(0, 25, 100, 250, 500,1000, 1000000),
            "Other grazing livestock" = c(0, 25, 50,100, 250, 500, 1000000),
            "Pig rearing" = c(0, 25, 100, 250, 500,1000, 1000000),
            "Pig fattening" = c(0, 25, 100, 250, 500,1000, 1000000),
            "Combined pig rearing and fattening" = c(0, 25, 100, 250, 500,1000, 1000000),
            "Eggs for consumption" = c(0, 25, 100, 500, 1000, 1000000),
            "Broilers" = c(0, 25, 100, 500, 1000, 1000000),
            "Other intensive livestock"= c(0, 25, 100, 500, 1000, 1000000),
            "Combined" = c(0, 25, 100, 250, 500,1000000)
        )

        # Saved the order of the NSO types in the same order they are shown in the 2018 FADN report
        strat_order <- names(strata_in)

        if (is.null(strata) || strata == "FADN_2018") {

            print("Strata of FADN 2018 are used")

        } else if (strata=="all") {

            # If strata=all all possible brackets are used for each type
            for (i in seq_along(strata_in)) {
                    strata_in[[i]] <- c(0, 25, 50, 100, 250, 500, 1000, 1500, 3000, 1000000)
                }

            print("strata = all >>> All strata instead of FADN strata will be used.")

        } else if (strata == "calculate") {
            
            # Actual calculations are done further down
            print("strata = calculate >>> All strata are optimised with respect to the variance.")

        }
    }


    if (isTRUE(BIN) && isTRUE(strata == "calculate")) {

        # The sample size as they are used in the 2018 FADN report
        # The question arises whether setting these sample size before is defendable. This might be the case if other things than SO are relevant.

        print("Please note that the samples sizes are not optimised, but taken from the 2018 FADN report")

        sample_size_by_NSOtype <- list(
                "Starch potatoes" = 30,
                "Organic crops" = 30,
                "Other field crops (non-organic)" = 150,
                "Vegetables under glass" = 130,
                "Plants under glass" = 65,
                "Flowers under glass" = 118,
                "Field vegetables" = 55,
                "Fruit" = 38,
                "Tree nursery" = 70,
                "Flower bulbs" = 37,
                "Other horticulture" = 45,
                "Dairy (organic)" = 30,
                "Dairy (non-organic)" = 300,
                "Calf fattening" = 40,
                "Goats" = 30,
                "Other grazing livestock" = 33,
                "Pig rearing" = 48,
                "Pig fattening" = 48,
                "Combined pig rearing and fattening" = 38,
                "Eggs for consumption" = 30,
                "Broilers" = 30,
                "Other intensive livestock"= 30,
                "Combined" = 75
            )

    }

    # DAT ####################################################################################################################################################

    # Select variables necessary from data
    var_subset <- c("SO","ENG_name", "jaar", "NTW", "SVC")

    # If there are is category_type, add that category type (i.e. region)
    if (!is.null(category_type)) {

        var_subset <- append(var_subset, category_type)

    }

    # Turn data into data.table format and subset variables needed
    DAT <- data.table(DAT)
    DAT <- DAT[,..var_subset]


    # Subset with year if given. When year is NULL, all years are used
    if (!is.null(year)) {
        DAT <- DAT[jaar==year]  
        print(paste0("Showing results for ", year))
    }
    
    # LBT ####################################################################################################################################################
    
    if (!is.null(LBT_data)){

        # Select variables necessary from data
        var_subset <- c("SO","ENG_name", "jaar", "year_available_after_nonresp")

        # If there are is category_type, add that category type (i.e. region)
        if (!is.null(category_type)) {
            var_subset <- append(var_subset, category_type)
        }

        # Turn data into data.table format and subset variables needed
        LBT <- data.table(LBT_data)
        LBT <- LBT[,..var_subset]

        # Subset with year if given. When year is NULL, all years are used
        if (!is.null(year)) {
            LBT <- LBT[jaar==year]  
        }

        if (!is.null(year_of_interest_for_approaching)) {

            # Include only approachable farms in dat_lbt (have not been approach yet or can be approached again)
            LBT <- setDT(LBT)[is.na(year_available_after_nonresp) | year_available_after_nonresp < year_of_interest_for_approaching,]
            print(paste0("The year in which to look for new BIN candidates is set, only firms that can be approached in ", year_of_interest_for_approaching, " are included"))

        }
    }


    if ( isTRUE(strata == "calculate") ) {

        # remove observations with SO values below 25000, these are not used to calculate the strata but will be added later

        lbt_over_25 <- LBT[SO>25]

        print("Optimal stata are being calculated")

        if ( is.null(category_type) ){

            # When there is no category type, use order of 2018 FADN report
            NSO_types <- strat_order

        } else if (!is.null(category_type)) {

            NSO_names <- strat_order
            categories_vec <- unique(DAT[, ..category_type])
            categories_vec <- unlist(as.vector(categories_vec))
            NSO_types <- paste0( paste0(rep(NSO_names, length(categories_vec)) ), paste0( rep(categories_vec, (length(NSO_names)*length(categories_vec)) )))

        }
        
        strata_in <- list()
        optimal_sample_sizes_by_SOcat <- list()
        
        print(paste0("Required variance reduction is ", required_benefit*100, "%"))
        
        for (i in 1:length(NSO_types)) {
            max_sample_size = sample_size_by_NSOtype[[i]]
            all_strata <- c(0, 25, 50, 100, 250, 500, 1000, 1500, 3000, 1000000)
            # Filter SO by NSO type
            lbt_by_cat_over_25 <- filter(lbt_over_25, ENG_name == NSO_types[i])  

            print("                                             ")
            print("                                             ")
            print(NSO_types[i])            
            print(paste0("Maximum sample size is ", max_sample_size))
            print("                                             ")
            print("                                             ")            

            strata.bh.x <- lbt_by_cat_over_25$SO

            # Local variables by NSO type
            starting_variance <- var(strata.bh.x)
            # THIS SHOULD BE CHECKED
            strata_vals <- c(25, 1000000)
            list_of_results <- list()
            list_of_variances <- list()
            benefit <- 1 # This makes sure that the while function runs the first time

            # Keeps trying to add more strata as long as the benefit is large enough
            while( benefit >= required_benefit ) {

                # Makes a list of variances for each added stratum
                for (j in seq_along(all_strata)){
                    # Adds a strata from left over strata
                    bh <- all_strata[j]
                    # Adds it to the already selected strata
                    bh <- append(bh, strata_vals)
                    bh <- unique(bh)   
                    bh <- sort(bh)
                    # print(bh)
                    list_of_results[[j]] <- strata.bh(x=strata.bh.x, bh=bh, n = max_sample_size, CV = NULL, Ls = length(bh)+1, certain = NULL,
                    takenone = 0)
                    list_of_variances[[j]] <- list_of_results[[j]]$RRMSE
                }

                ### BENEFIT ####################################################################################################################

                # Compares best new variance with old variance
                if (!exists("old_var_value")) {
                    old_var_value <- starting_variance
                }

                # This makes it choose the best choice every time
                new_var_value <- list_of_variances[which.min(replace(list_of_variances, list_of_variances<=0, NA))][[1]]

                # Calculates the benefit of the best choice
                benefit <- (old_var_value-new_var_value)/old_var_value

                ###############################################################################################################################

                # Continuous if new variance is 5% better than old variance
                if (benefit >= required_benefit) {

                    # Selects the border value belonging to the best choice and adds it to optimal set
                    strata_vals <- append(strata_vals, all_strata[which.min(replace(list_of_variances, list_of_variances<=0, NA))][1])
                    strata_vals <- unique(strata_vals)
                    strata_vals <- sort(strata_vals)

                    # If there was a benefit the new value becomes the old value
                    old_var_value <- new_var_value

                    # Remove the chosen strata from the list of options
                    all_strata = all_strata[!(all_strata %in% strata_vals)]
                    all_strata <- sort(all_strata)

                    # This saves the strata when there is still a benefit. It is no longer overwritten when there is no longer a benefit.
                    bh_star <- strata_vals

                } else {

                    list_of_results[[j]] <- strata.bh(x=strata.bh.x, bh=bh_star, n = max_sample_size, CV = NULL, Ls = length(bh_star)+1, certain = NULL,
                    takenone = 0)

                    print(list_of_results[[j]])
                    
                    rm(old_var_value)
                    strata_vals <- sort(strata_vals)
                    sample_sizes_by_SOcat <- list_of_results[[j]]$nh

                }

            }

            # When the benefit is too small put all strata into a list.
            optimal_sample_sizes_by_SOcat[[i]] <- sample_sizes_by_SOcat
            strata_in[[i]] <- strata_vals   

        }

        strata_in[] <- lapply(strata_in, function(i)unique(c(0, 25, i, 1000000)))

        # Add NSO names to list
        names(strata_in) <- NSO_types
        names(optimal_sample_sizes_by_SOcat) <- NSO_types

    }  
    
    if ( isTRUE(strata == "calculate") ) {

        # These are the optimal numbers for BIN
        optimal_sample_sizes_by_SOcat <- data.frame(matrix(optimal_sample_sizes_by_SOcat, byrow = TRUE))
        optimal_sample_sizes_by_SOcat$rn <- names(strata_in)
        names(optimal_sample_sizes_by_SOcat)[1] <- "optimal_observations"
        optimal_sample_sizes_by_SOcat <- data.table(optimal_sample_sizes_by_SOcat)

    }

    # These are the strata coinciding with the optimal numbers for BIN
    strata_df <- data.frame(matrix(strata_in, byrow = TRUE))
    strata_df$rn <- names(strata_in)
    names(strata_df)[1] <- "strata_column"
    strata_df <- data.table(strata_df)

    DAT <- data.table(DAT)
    DAT <- merge(DAT, strata_df, all.x=TRUE, by.x="ENG_name", by.y="rn", sort=TRUE)

    if ( isTRUE(strata == "calculate") ) {
        DAT <- merge(DAT, optimal_sample_sizes_by_SOcat, all.x=TRUE, by.x="ENG_name", by.y="rn", sort=TRUE)
    }

    # Strata need to be decide based on the total population including observations that cannot be approached.
    # Because of this it is possible t

    # Create intervals
    DAT[, SO_cat:=interval(SO, strata_column)]
    # Remove white spaace
    DAT$SO_cat <- gsub('\\s+', '', DAT$SO_cat)

    # Current Strata
    DAT$SO_cat <- as.character(DAT$SO_cat)
    # # Current Strata by type
    DAT$SO_cat_type <- with(DAT, paste0(SO_cat, " ",ENG_name))

    # Amount of observations available in BIN per NSO type

    print("BIN data loaded")

    # LBT
    if (!is.null(LBT_data)){
        if (!is.null(year_of_interest_for_approaching)) {
            LBT <- setDT(LBT)[is.na(year_available_after_nonresp) | year_available_after_nonresp < year_of_interest_for_approaching,]
        }
        LBT <- merge(LBT, strata_df, all.x=TRUE, by.x="ENG_name", by.y="rn", sort=TRUE)

        LBT[, SO_cat:=interval(SO, strata_column)]

        # Remove white spaace
        LBT$SO_cat <- gsub('\\s+', '', LBT$SO_cat)

        # Current Strata
        LBT$SO_cat <- as.character(LBT$SO_cat)

        # # Current Strata by type
        LBT$SO_cat_type <- with(LBT, paste0(SO_cat," " ,ENG_name))
        print("LBT data loaded")

    }



    if (!is.null(category_type)) {
        categories_vec <- unique(DAT[, ..category_type])
        categories_vec <- unlist(as.vector(categories_vec))
        if (!is.null(LBT_data)){
        # CHANGE
            LBT$SO_cat_reg <- with(LBT, paste0(SO_cat," " , get(category_type)))
            DAT$SO_cat_reg <- with(DAT, paste0(SO_cat, " ", get(category_type)))
            print("Categories = TRUE")
        }
    }

    if (calculate=="variance" && is.null(category_type)) {
        LBT[, `:=` (var_SO = round( var(SO) ,0)), by = SO_cat_type]
        print("Variance calculated")
    } else if (calculate=="variance" && !is.null(category_type)) {
        LBT[, `:=` (var_SO = round(var(SO),0)), by = SO_cat_reg]
        print("Variance calculated")     
    } else if (calculate=="relative_svc_deviation" && !is.null(category_type)) {
        print("2a")
        # DOES NOT WORK
        rel_deviation_by_categories <- paste0("rel_deviation_by_", category_type)
        DAT <- data.table(DAT)
        out_rel_deviation_by_categories <<- rel_deviation_by_categories
        DAT[, (rel_deviation_by_categories) :=  100 * mean( (SVC - NTW) / NTW , na.rm=TRUE), by=c("SO_cat_type", category_type)]   
        if ( length(year) > 1 ) {
            rel_deviation_by_categories <- paste0("rel_deviation_", category_type, "by_year" )
            DAT[, (rel_deviation_by_categories_by_year) := 100 * mean( ( SVC - NTW ) / NTW, na.rm=TRUE), by=c("SO_cat_type", category_type, if(year){"jaar"})]
        }
        print("Relative SVC deviations calculated, by ")
        print("Please note that SVC deviations should be calculated on all years, so 'years' should be set to NULL")
    } else if (calculate=="relative_svc_deviation" && is.null(category_type)) {
        # DOES NOT WORK
        DAT <- data.table(DAT)      
        DAT[, relative_deviation :=  100 * mean( (SVC - NTW) / NTW , na.rm=TRUE), by=c("SO_cat_type")]    
        if ( length(year) > 1 ) {
            rel_deviation_by_year <- paste0("rel_deviation_", "by_year" )
            DAT[, (rel_deviation_by_year) := 100 * mean( ( SVC - NTW ) / NTW, na.rm=TRUE), by=c("SO_cat_type", if(year){"jaar"})]
        }
        print("Relative SVC deviations calculated")
        print("Please note that SVC deviations should be calculated on all years, so 'years' should be set to NULL")
    }

    if (is.null(category_type) && calculate == "none" && !is.null(LBT_data)) {
        table_lbt_input <- table(LBT$ENG_name, LBT$SO_cat)
        print("LBT table created")
        table_bin_input <- table(DAT$ENG_name, DAT$SO_cat)
        print("BIN table created")             
    } else if (!is.null(category_type) && calculate == "none" && !is.null(LBT_data)) {
        table_lbt_input <- table(LBT$ENG_name, LBT$SO_cat_reg)
        print("LBT table created")
        table_bin_input <- table(DAT$ENG_name, DAT$SO_cat_reg)
        print("BIN table created")        
    } else if (is.null(category_type) && calculate=="variance") {
        table_lbt_input <- xtabs(var_SO~ENG_name+SO_cat, LBT)
        print("Variance table created")
    } else if (!is.null(category_type) && calculate=="variance") {
        table_lbt_input <- xtabs(var_SO~ENG_name+SO_cat_reg, LBT)
        print("Variance table created")
    } else if (is.null(category_type) && calculate=="relative_svc_deviation") {
        # xtabs(as.formula(paste(mean_val_by_var, "~", by_var)), dat)
        table_bin_input <- xtabs(as.formula(paste("relative_deviation", "~", "ENG_name", "+", "SO_cat")), DAT)
        table_bin_input <- round(table_bin_input, 3)     
        BIN_observations <- table(DAT$ENG_name, DAT$SO_cat)
        print("Relative SVC deviations table created")
    } else if (!is.null(category_type) && calculate=="relative_svc_deviation") {
        table_bin_input <- xtabs(as.formula(paste("rel_deviation_by_categories", "~", "ENG_name", "+", "SO_cat_reg")), DAT)
        table_bin_input <- round(table_bin_input, 3)
        print(table_bin_input)
        BIN_observations <- table(DAT$ENG_name, DAT$SO_cat)
        print("Relative SVC deviations table created")
    } else if (calculate == "fractions") {
        table_bin_input <- table(DAT$ENG_name, DAT$SO_cat)
        print("BIN table created")
        table_lbt_input <- table(LBT$ENG_name, LBT$SO_cat)
        print("LBT table created")
    }
    
    if (exists("table_bin_input")) {
        # Row sums stored before changes are made to the table
        index_order <- match(strat_order, row.names(table_bin_input))
        table_bin_input <- table_bin_input[index_order,]
    }

    if (is.null(category_type) && calculate=="none") {
        row_sums_stored_from_only_bin <- rowSums(table_bin_input, na.rm=TRUE)
    } else if (!is.null(category_type) && calculate=="none") {
        list_of_row_sums_stored_from_only_bin <- list()
        tibble_format <- as.data.frame.matrix(table_bin_input)
        categories_vec <- sort(categories_vec)
        for (i in seq_along(categories_vec)) {

            tibble_format_temp <- tibble_format %>% select(ends_with(as.vector(unlist(categories_vec[[i]]))))
            list_of_row_sums_stored_from_only_bin[[i]] <- rowSums(tibble_format_temp, na.rm=TRUE)  
                 
        }
        names(list_of_row_sums_stored_from_only_bin) <- categories_vec
        list_of_row_sums_stored_from_only_bin = list_of_row_sums_stored_from_only_bin[order(names(list_of_row_sums_stored_from_only_bin))]
        print(names(list_of_row_sums_stored_from_only_bin))
    } else if (is.null(category_type) && calculate=="relative_svc_deviation") {
        # Weights have to be taken from BIN
        table_bin_input_temp <- as.data.frame.matrix(table_bin_input)
        BIN_observations <- as.data.frame.matrix(BIN_observations)
        # BIN observations are in wrong order
        index_order <- match(strat_order, row.names(BIN_observations))
        BIN_observations <- BIN_observations[index_order,]

        row_sums_stored_from_only_bin <- apply(table_bin_input_temp, 1, function(x) weighted.mean(x, BIN_observations, na.rm=TRUE))
        row_sums_stored_from_only_bin <- round(row_sums_stored_from_only_bin, 3)
    }
    

    print("Input tables created")

    combination_table <- function(table_bin_input, table_lbt_input) {

        # get all names from DAT en LBT
        names_bin_table <- colnames(table_bin_input)
        names_lbt_table <- colnames(table_lbt_input)

        # add columns (with zeroes) which are not present in DAT from LBT data
        add_to_bin_names <- setdiff(names_lbt_table, names_bin_table)
        out_add_to_bin_names <<- add_to_bin_names

        if (length(add_to_bin_names)>1) {
            add_to_bin <- table_lbt_input[,add_to_bin_names]
            add_to_bin[add_to_bin > 0] <- 0 
            BIN_II <- cbind(table_bin_input, add_to_bin)
            BIN_II <- BIN_II[,names_lbt_table]
        } else if (length(add_to_bin_names)==1) {
            print("1 column added")
            add_to_bin <- table_lbt_input[,add_to_bin_names]
            # Because there is only one column:
            add_to_bin <- data.table(add_to_bin)            
            colnames(add_to_bin) <- add_to_bin_names
            add_to_bin[add_to_bin > 0] <- 0 
            BIN_II <- cbind(as.data.frame.matrix(table_bin_input), add_to_bin)
            # BIN_II <- data.table(BIN_II, keep.rownames=TRUE)
            BIN_II <- BIN_II[,names_lbt_table]
        } else {
            BIN_II <- copy(table_bin_input)
        }
        
        # Put BIN_II strata columns right order
        replacement_order <- order_cols(BIN_II)
        BIN_II <- BIN_II[,replacement_order]

        # Put BIN NSO rows right order
        index_order <- match(strat_order, row.names(BIN_II))
        BIN_II <- BIN_II[index_order,]

        # Put table_bin_input strata columns right order
        replacement_order <- order_cols(table_bin_input)
        table_bin_input <- table_bin_input[,replacement_order]
        # Put table_bin_input NSO rows right order
        index_order <- match(strat_order, row.names(table_bin_input))
        table_bin_input <- table_bin_input[index_order,]
        # Put table_lbt_input strata columns right order
        replacement_order <- order_cols(table_lbt_input)
        table_lbt_input <- table_lbt_input[,replacement_order]
        # Put table_lbt_input NSO rows right order
        index_order <- match(strat_order, row.names(table_lbt_input))
        table_lbt_input <- table_lbt_input[index_order,]

        # Table mutations
        if (isTRUE(strata=="calculate")) {

            # Create a table using LBT table. These values are necessarily bigger than zero because otherwise no observations could be selected.
            table_for_optimal_N <- table_lbt_input

            # Put strata columns right order
            replacement_order <- order_cols(table_for_optimal_N)
            table_for_optimal_N <- table_for_optimal_N[,replacement_order]

            # Put NSO rows right order
            index_order <- match(strat_order, row.names(table_for_optimal_N))
            table_for_optimal_N <- table_for_optimal_N[index_order,]

            # Replace values
            tab <- t(table_for_optimal_N)

            # Exclude the first row
            tab <- tab[-1,]

            # Remove zero values
            positive_nr_vec <- lapply(optimal_sample_sizes_by_SOcat$optimal_observations, function(x) {x[x!=0]})

            tab[tab > 0] <- unlist(positive_nr_vec)

            tab <- t(tab)

            tab <- cbind(table_bin_input[,1],tab)

            # Put table_bin_input strata columns right order
            replacement_order <- order_cols(table_bin_input)
            table_bin_input <- table_bin_input[,replacement_order]
            # Put table_bin_input NSO rows right order
            index_order <- match(strat_order, row.names(table_bin_input))
            table_bin_input <- table_bin_input[index_order,]
            # Put table_lbt_input strata columns right order
            replacement_order <- order_cols(table_lbt_input)
            table_lbt_input <- table_lbt_input[,replacement_order]
            # Put table_lbt_input NSO rows right order
            index_order <- match(strat_order, row.names(table_lbt_input))
            table_lbt_input <- table_lbt_input[index_order,]
            # Put tab strata columns right order
            replacement_order <- order_cols(tab)
            tab <- tab[,replacement_order]
            # Put tab NSO rows right order
            index_order <- match(strat_order, row.names(tab))
            tab <- tab[index_order,]

            # Subtract optimal observations from available BIN observations, excluding[0,25]. This calculates observations surplus.
            BIN_II <- table_bin_input[,2:ncol(BIN_II)] - tab[,2:ncol(tab)]
            # Add back [0,25]
            BIN_II <- cbind(table_bin_input[,1], BIN_II)
            colnames(BIN_II)[1] <- "[0,25)"

            # out_BIN_II has the negative values needed further down below
            out_BIN_II <<- BIN_II
            # Replace values if surplus is negative (shortage)
            i2 <- BIN_II < 0 & table_lbt_input >= 0
            BIN_II[i2] <- table_lbt_input[i2]            
            # DON'T CHANGE NEXT LINE
            bin_replacements_index_table <<- i2 # Otherwise value is not returned
            
        } else if ( isTRUE(calculate == "none")) {
            i1 <- BIN_II == 0 & table_lbt_input >= 0
            BIN_II[i1] <- table_lbt_input[i1]            
            # DON'T CHANGE NEXT LINE
            bin_replacements_index_table <<- i1 # Otherwise value is not returned
        } else if (isTRUE(calculate == "fractions")) {
            out_table_bin_input <<- table_bin_input
            out_table_lbt_input <<- table_lbt_input
            BIN_II <- table_bin_input/table_lbt_input
            BIN_II <- round(BIN_II, 3)
        }
        
        # Put BIN_II strata columns right order
        replacement_order <- order_cols(BIN_II)
        BIN_II <- BIN_II[,replacement_order]

        # Put BIN NSO rows right order
        index_order <- match(strat_order, row.names(BIN_II))
        BIN_II <- BIN_II[index_order,]  
        return(BIN_II)
    }


    if (!is.null(LBT_data) && calculate == "none"){

        table_in <- combination_table(table_bin_input, table_lbt_input)
        # Put table_in strata columns right order
        replacement_order <- order_cols(table_in)
        table_in <- table_in[,replacement_order]

        # Put table_in NSO rows right order
        index_order <- match(strat_order, row.names(table_in))
        table_in <- table_in[index_order,]
        print("Replaced empty BIN cells with LBT data")

    } else if (is.null(LBT_data) && calculate == "none") {
        
        table_in <- table_bin_input

    } else if (!is.null(LBT_data) && calculate=="variance"){
        
        print("Showing variance of LBT data")
        table_in <- table_lbt_input

    } else if (calculate=="relative_svc_deviation") {

        table_in <- table_bin_input
        print("Showing Relative SVC deviations of BIN data")

    } else if (calculate=="fractions") {

        table_in <- combination_table(table_bin_input, table_lbt_input)
        print("Showing BIN as a fraction of LBT data")

    }

    # Deals with NA as a row name
    table_in <- table_in[!is.na(row.names(table_in)),]
    if (exists("bin_replacements_index_table") && length(out_add_to_bin_names)>1 ) {
        bin_replacements_index_table <- bin_replacements_index_table[!is.na(row.names(bin_replacements_index_table)),]
    }

    # Put table_in strata columns right order
    replacement_order <- order_cols(table_in)
    table_in <- table_in[,replacement_order]

    # Put table_in NSO rows right order
    index_order <- match(strat_order, row.names(table_in))
    table_in <- table_in[index_order,]


    # Deals with NA as a column name
    for (i in seq_along(colnames(table_in))){
        if (is.na(colnames(table_in)[i])) {
            colnames(table_in)[i] <- "Missing"
            table_in <- table_in[complete.cases(table_in), ]
            if (exists("bin_replacements_index_table")) {
                colnames(bin_replacements_index_table)[i] <- "Missing"
            }
        }
    }    

    # Put table_in strata columns right order
    replacement_order <- order_cols(table_in)
    table_in <- table_in[,replacement_order]

    # Put table_in NSO rows right order
    index_order <- match(strat_order, row.names(table_in))
    table_in <- table_in[index_order,]

    table_in <- as.data.frame.matrix(table_in)

    # Install necessary packages

    names(table_in)[duplicated(names(table_in))[]]

    colnames(table_in) = gsub(pattern = "NA*", replacement = "None", x = colnames(table_in))
    colnames(table_in) = iconv(colnames(table_in), to='ASCII//TRANSLIT') 
    colnames(table_in) = str_to_title(colnames(table_in)) # ; names(table_in)

    if (exists("bin_replacements_index_table")) {
        colnames(bin_replacements_index_table) = gsub(pattern = "NA*", replacement = "None", x = colnames(bin_replacements_index_table))
        colnames(bin_replacements_index_table) = iconv(colnames(bin_replacements_index_table), to='ASCII//TRANSLIT') 
        colnames(bin_replacements_index_table) = str_to_title(colnames(bin_replacements_index_table)) # ; names(table_in)
    }

    setnames(table_in, "Missinoneg", "Missing", skip_absent=TRUE) # Because of LMM data

    ###################################################################################################################################################
    ###################################################################################################################################################
    ###################################################################################################################################################

    if (!is.na(category_type) && ncol(table_in) < 8) {
        print("Less than 8 columns in table_in, assuming that the only categories in the data are region or soil-type")

        categories_vec <- colnames(table_in)
        table_in$Total <- rowSums(table_in, na.rm=TRUE)

        table_in <- setDT(table_in, keep.rownames = TRUE)[]

        if(isTRUE(BIN)){
            
            table_in <- table_in %>%
                slice(match(strat_order, rn))

            names(table_in)[1] <- "Category"
        }

        if (!is.null(LBT_data) && isFALSE(calculate == "fractions")) {
            print("Replacing BIN cells with zero observations with data from LBT")
            # Replace the empty BIN cells with LBT
            for (i in 1:(length(categories_vec))) {
                table_in[[i+1]][as.vector(unlist(bin_replacements_index_table[,i]))] = paste0(table_in[[i+1]][as.vector(unlist(bin_replacements_index_table[,i]))], " (in LBT)")        
            }
        }

        flextable_out <- flextable(table_in)

        flextable_out <- align(flextable_out, align = "center", part = "all")
        flextable_out <- bold(flextable_out, i = c(1), bold = TRUE, part = "head")
        flextable_out <- bold(flextable_out, j = c(1), bold = TRUE, part = "body")     

        my_color_fun <- function(x) {
            out <- rep("white", length(x))
            idx <- suppressWarnings(as.numeric(x) <= 5)
            out[idx] <- 'yellow'
            idx <- suppressWarnings(as.numeric(x) <= 3)
            out[idx] <- 'orange'
            idx <- suppressWarnings(as.numeric(x) <= 1)
            out[idx] <- 'pink'
            idx <- suppressWarnings(as.numeric(x) <= 0)
            out[idx] <- 'red'
            idx <- suppressWarnings(is.na(as.numeric(x)))
            out[idx] <- 'red'
            out
        }

        my_color_fun_total <- function(x) {
            out <- rep("white", length(x))
            idx <- suppressWarnings(as.numeric(x) < 33)
            out[idx] <- 'pink'
            idx <- suppressWarnings(as.numeric(x) < 30)
            out[idx] <- 'red'
            out
        }

        if (isTRUE(mark_low_vals)) {
            if( isTRUE(BIN) && isFALSE(calculate == "fractions") ){
                flextable_out <- bg(flextable_out, j=c(categories_vec), bg = my_color_fun, part="body")       
                flextable_out <- bg(flextable_out, j=c("Total"), bg = my_color_fun_total, part="body")   
            } else if(isTRUE(BIN) && isTRUE(calculate == "fractions")) {
                flextable_out <- bg(flextable_out, j=c(categories_vec), bg = my_color_fun_fractions, part="body")       
            } else {
                # FIX
                flextable_out <- bg(flextable_out, j = c("Cat_1", "Cat_2", "Cat_3", "Cat_4", "Total"), bg = my_color_fun, part="body")
            }
        }
        
        flextable_out <- fontsize(flextable_out, i = NULL, j = NULL, size = 8, part = "header")
        flextable_out <- fontsize(flextable_out, i = NULL, j = NULL, size = 8, part = "body")


        if(isTRUE(BIN)){
            flextable_out <- set_caption(flextable_out, 
                caption = "Sampling fraction according to the 2018 Agricultural Census by category", 
                style = "Table Caption", 
                autonum = run_autonum(seq_id = "tab", bkm = "tab1")
            )
        } else {
            flextable_out <- set_caption(flextable_out, 
                caption = "LMM observations", 
                style = "Table Caption", 
                autonum = run_autonum(seq_id = "tab", bkm = "tab1")
            )  
        }

        list_of_cats <- list()
        list_of_cats[[1]] <- flextable_out

    } else { ##########################################################################################################################################

        print("Input data has more than 7 columns; Assuming that SO strata are used.")
        # Check for categories
        name_vec <- names(table_in)
        name_vec <- data.frame(name_vec)

        name_vec <- name_vec %>%
            tidyr::extract(name_vec, c('lower', 'upper', 'threshold_categories'), '(\\d+),(\\d+)[\\]\\)]\\s*(\\w*)', convert = TRUE)
       
        # Adding missing columns that do not show up at all for some categories
        if (!is.na(name_vec$threshold_categories[1])) {
            present_combinations <- str_split(names(table_in), pattern=" ", n = 2, simplify=TRUE)
            present_strata_combinations <- unique(present_combinations[,1]) # 13

            present_categories <- unique(present_combinations[,2]) # 5

            all_combinations <- rbind(rep(present_strata_combinations, each = length(present_categories)))
            all_combinations <- rbind(all_combinations, present_categories)
            all_combinations <- paste0(all_combinations[1,], " ", all_combinations[2,])
            
            if (    length(names(table_in)) != length(all_combinations) ) {
                # Add missing categories
                add_name_vector <- all_combinations[all_combinations %nin% names(table_in)]

                for (i in seq_along(add_name_vector)) {
                    if (add_name_vector[i] %nin% names(table_in)) {
                        table_in[ , add_name_vector[i]] <- 0
                        if (exists("bin_replacements_index_table")) {
                            bin_replacements_index_table <- cbind(bin_replacements_index_table,FALSE)
                            colnames(bin_replacements_index_table)[ncol(bin_replacements_index_table)] <- add_name_vector[i]
                        }
                    }
                }
            }
            print("Missing table categories added")
        }

        lowers <- unique(name_vec$lower)
        uppers <- unique(name_vec$upper)
        categories_vec <- unique(name_vec$threshold_categories)

        lowers <- sort(lowers)
        uppers <- sort(uppers)
        categories_vec <- sort(categories_vec)

        strata_df <- tibble::enframe(strata_in, name = "ENG_name", value = "strata_column")

        set_flextable_defaults(
            font.size = 10, font.family = "Helvetica",
            text.align = "center",
            font.color = "#333333",
            border.color = "gray"
            # padding.top = 3, padding.bottom = 3,
            # padding.left = 4, padding.right = 4
            )

        names(table_in)[duplicated(names(table_in))[]]

    # Make thresholds
        thresholds_maker <- function(table_in, by_cat=FALSE) {

            if (!is.na(categories_vec[1])) {
                out <- rbind(rep(lowers, each = length(categories_vec)), rep(uppers, each = length(categories_vec)))
                colnames(out) <- rep(c(categories_vec), length.out = ncol(out))
                row.names(out) <- c('lower threshold x1000', 'upper threshold x1000')
                # https://stackoverflow.com/questions/62960127/convert-column-names-into-first-row-of-data-frame-in-r
                if (by_cat) {
                    firstrow <- colnames(out)
                    out <- setNames(rbind(firstrow, out), names(out))
                    rownames(out)[1] <- "region"
                    out <- data.frame(out)
                    # out[out=="1000000"]<-"Infinity"
                    names(out) <- apply(out, 2, paste0, collapse="_")
                    out <- data.table(out, keep.rownames = TRUE)[]
                } else {
                    lastrow <- colnames(out)
                    out <- setNames(rbind(out,lastrow), names(out))
                    rownames(out)[3] <- "region"
                    out <- data.frame(out)
                    # out[out=="1000000"]<-"Infinity"
                    names(out) <- apply(out, 2, paste0, collapse="_")
                    out <- data.table(out, keep.rownames = TRUE)[]
                }
            } else {
                out <- rbind(lowers, uppers)
                row.names(out) <- c('lower threshold x1000', 'upper threshold x1000')
                firstrow <- colnames(out)
                # https://stackoverflow.com/questions/62960127/convert-column-names-into-first-row-of-data-frame-in-r
                out <- setNames(rbind(firstrow, out), names(out))
                out <- data.frame(out)
                names(out) <- apply(out, 2, paste0, collapse="_")
                out <- data.table(out, keep.rownames = TRUE)[]
                names_out <- vector()
                out <- data.frame(out)
                for (i in seq_along(out[1,])) {
                    names_out <- append(names_out, paste0('str_', paste0(out[,i])[1], '_', paste0(out[,i])[2],collapse=""))
                }
                out <- data.frame(out)
                names(out) <- names_out
            }
            
            out
        }

        thresholds_cat <- thresholds_maker(table_in, by_cat=TRUE)
        thresholds_strata <- thresholds_maker(table_in)

        # COLUM ORDER
        # https://stackoverflow.com/questions/72141429/converting-column-names-so-they-can-be-put-in-an-numerical-order/72141622?noredirect=1#comment127466664_72141622
        replacement_order <- order_cols(table_in)
        table_in <- table_in[replacement_order]

        names(table_in)[duplicated(names(table_in))[]]

        # First by category
        if (!is.na(categories_vec[1])) {
            category_order <- stringr::word(names(table_in), -1)
            category_order <- order(category_order)
            table_in <- table_in[category_order]
        }

        category_order_sorted_by_strata <- stringr::word(names(thresholds_strata), 1)

        ############################################################################

        # total observations/population per category
        if (is.na(categories_vec[1]) && calculate=="fractions") {
            # Put table_lbt_input NSO rows right order
            table_in$Total <- rowSums(table_in, na.rm = TRUE)
        } else if (is.na(categories_vec[1]) && calculate!="variance") {
            table_in$Total <- row_sums_stored_from_only_bin
        }
        # Convert to data.table (data.table does not support rownames)
        table_in <- data.table(table_in, keep.rownames = TRUE)[] # This table already has the right numbers for BIN and LBT

        # REPLACEMENT TABLE WITH LBT
        if (!is.null(LBT_data) && calculate == "none") {
            print("Replacing BIN cells with zero observations with data from LBT")
            # # Put table_in strata columns right order
            # replacement_order <- order_cols(table_in)
            # table_in <- table_in[,replacement_order]

            # # Put table_in NSO rows right order
            # index_order <- match(strat_order, row.names(table_in))
            # table_in <- table_in[index_order,]

            replacement_table_in <- table_in

            # Puts the rows of the replacement table in the correct order
            replacement_table_in <- replacement_table_in %>%
                slice(match(strat_order, rn))
        
            # BIN REPLACEMENTS WAS STORED IN COMBINATION TABLE
            bin_replacements_index_table <- bin_replacements_index_table[match(strat_order, rownames(bin_replacements_index_table)),]

            col_order <- names(replacement_table_in)[2:(ncol(replacement_table_in)-1)]

            bin_replacements_index_table <- bin_replacements_index_table[, col_order]

            # 2:(ncol(bin_replacements_index_table)-1)] simply skips the columns rn and Total
            replacement_table_in[,2:(ncol(bin_replacements_index_table)-1)] <- replacement_table_in[,2:(ncol(bin_replacements_index_table)-1)]    

            # Replace the zero BIN cells with LBT
            # if (length(replacement_table_in)<16) {
            if (isTRUE(strata=="calculate")) {
                # Adapt negative values table to match replacement_table_in
                out_BIN_II <- as.data.frame.matrix(out_BIN_II*-1)
                for (i in 1:(ncol(replacement_table_in)-2)) {
                    replacement_table_in[[i+1]][as.vector(unlist(bin_replacements_index_table[,i]))] = paste0(replacement_table_in[[i+1]][as.vector(unlist(bin_replacements_index_table[,i]))], " in LBT - ", 
                    paste0("(", out_BIN_II[[i]][as.vector(unlist(bin_replacements_index_table[,i]))]," needed)")) # [i] instead of [i+1] because no "rn" column
                }
            } else {
                for (i in 1:(ncol(replacement_table_in)-2)) {
                    replacement_table_in[[i+1]][as.vector(unlist(bin_replacements_index_table[,i]))] = 
                    paste0( replacement_table_in[[i+1]][as.vector(unlist(bin_replacements_index_table[,i]))], " in LBT"  )
                }
            }
            # }

            replacement_table_in <- merge(replacement_table_in, strata_df, all.x=TRUE, by.x="rn", by.y="ENG_name", sort=TRUE)

            replacement_table_in$Total <- as.character(replacement_table_in$Total)

            table_in <- replacement_table_in

            # # Put table_in strata columns right order
            # replacement_order <- order_cols(table_in)
            # table_in <- table_in[,replacement_order]

            # # Put table_in NSO rows right order
            # index_order <- match(strat_order, row.names(table_in))
            # table_in <- table_in[index_order,]

        } else if (calculate != "none") {
            table_in <- merge(table_in, strata_df, all.x=TRUE, by.x="rn", by.y="ENG_name", sort=TRUE)
        } 

    # Add lists of frequencies ###############################################

        if (calculate=="variance"){
            table_in$Total <- "Not applicable"
        }

        if (!is.na(categories_vec[1])) {  
            table_in <- table_in %>% mutate_if(is.numeric,as.character)       
            print("Making a separate table for each of the categories")
            out_table_in_X <<- table_in
            # Split table categories
            frequency_table <- out_table_in_X %>%
                pivot_longer(cols = -c(rn, strata_column, Total),
                names_to = c("lower", "upper", "direction"),
                names_pattern = "\\[(\\d+),(\\d+)[\\)\\]]\\s+(\\S+$)",
                    values_drop_na = TRUE) %>% 
                type.convert(as.is = TRUE) 

            # Remove total of all categories together because it is not useful.
            frequency_table$Total <- NULL
            # Only keep rows of which the strata_column match
            frequency_table <- frequency_table %>%
                rowwise() %>% 
                filter(is.consec(as.numeric(lower), as.numeric(upper), strata_column)) %>%
                ungroup()

            frequency_table$value <- as.character(frequency_table$value)
            frequency_table$upper <- as.character(frequency_table$upper)


            frequency_table <- frequency_table %>%
                group_by(rn, direction ) %>% 
                summarise(value = as.character(sum(as.numeric(value), na.rm=TRUE)), .groups = 'drop_last', upper="1000001", strata_column=strata_column) %>%                  # get sum of sizes
                bind_rows(frequency_table, .)   

            # Remove the duplicate rows     
            frequency_table <- unique( frequency_table )

            # Convert upper back to numeric for sorting                     
            frequency_table$upper <- as.numeric(frequency_table$upper)
            frequency_table <- frequency_table %>%
                arrange(rn, direction, upper)

            # THIS SUMMARIZES IT
            frequency_table <- frequency_table %>%
                group_by(rn, strata_column) %>% 
                summarise(freq = list(value), .groups = 'drop')  

        } else {
            out_table_in_Y <<- table_in
            out_table_in <<- table_in
            frequency_table <- out_table_in  %>%
                pivot_longer(-c(rn, strata_column, Total)) %>%
                tidyr::extract(name, c('lower', 'upper', 'direction'), '(\\d+),(\\d+)[\\]\\)]\\s*(\\w*)', convert = TRUE) %>%
                select(-where(is_all_na)) %>%
                rowwise() %>% 
                filter(is.consec(lower, upper, strata_column)) %>%
                ungroup() %>% 
                group_by(rn, strata_column, Total) %>% 
                summarise(freq = list(value), .groups = 'drop')
                
            for (i in seq_along(frequency_table$freq)){
                frequency_table$freq[[i]] <- append(frequency_table$freq[[i]],frequency_table$Total[[i]])
            }

        }

        frequency_table <- frequency_table %>%
                slice(match(strat_order, rn))

        ##########################################################################################
        # Calculate standard colspan
        oldw <- getOption("warn")
        options(warn = -1)

        x <- as.numeric(unique(unlist(thresholds_strata)))
        options(warn = oldw)
        unique_num_values <- x[!is.na(x)]
        unique_num_values <- sort(unique_num_values)
        total_colspan <- c(unique_num_values, "Total")
        total_colspan <- gsub(1000000, "Infinity", total_colspan, fixed = T)

        frequency_table$strata_column <- lapply(frequency_table$strata_column, \(x){
            # x <- append(x, c("Total"))
            x <- gsub(1000000, "Infinity", x, fixed = T)
            x <- append(x, c("Total"))
            x
        })

        # Index differences
        l <- lapply(frequency_table$strata_column, \(y) sapply(y, \(x) which(total_colspan == x) - which(y == x)))
        frequency_table$l <- l
        frequency_table$l <- lapply(frequency_table$l, \(x) diff(x) + 1)

        if (!is.na(categories_vec[1])){

            if (length(frequency_table$freq[[1]]) > length(total_colspan)) {
                frequency_table$l <- lapply(frequency_table$l, \(x){
                    x <- append( x, rep( x, length(categories_vec) - 1 )  )
                    x
                })
            }
            print("For the following categories:")
            print(categories_vec)
        }
                    
        html_table_in <- frequency_table[c("rn", "freq","l")] %>% 
            unnest_longer(freq) %>% 
            mutate(colspan = unlist(frequency_table$l),
                width = colspan * 50)

        colspan <- html_table_in %>%
            group_by(rn) %>%
            summarise(colspan = list(colspan))

        frequency_table <- merge(frequency_table, colspan, by.x="rn", by.y="rn")
        
        if (!is.na(categories_vec[1])) {
            # SET TABLE ORDER
            frequency_table <- frequency_table %>%
                slice(order(factor(rn, strat_order)))
            print("Set table order to order used in FADN 2018")
        } else {
            # SET TABLE ORDER
            frequency_table <- frequency_table %>%
                slice(match(strat_order, rn))
            print("Set table order to order used in FADN 2018")                
        }

        frequency_table_out <- frequency_table[,c("rn", "freq", "colspan")]

        moveMeDataTable <-function(data, tomove, where = "last", ba = NULL) {
            data <- data.table(data)
            temp <- setdiff(names(data), tomove)
            x <- switch(
            where,
            first = setcolorder(data,c(tomove, temp)),
            last = setcolorder(data,c(temp, tomove)),
            before = {
                if (is.null(ba)) stop("must specify ba column")
                if (length(ba) > 1) stop("ba must be a single character string")
                order = append(temp, values = tomove, after = (match(ba, temp)-1))
                setcolorder(data,order)

            },
            after = {
                if (is.null(ba)) stop("must specify ba column")
                if (length(ba) > 1) stop("ba must be a single character string")
                order = append(temp, values = tomove, after = (match(ba, temp)))
                setcolorder(data,order)
            })
            x
        }

        if (!is.na(categories_vec[1])){
            # Adapt
            all_categories <- gsub("_"," ", names(thresholds_strata))
            all_categories_1 <- stringr::word(all_categories, -1)
            all_category_order <- order(all_categories_1)
            thresholds_cat <- thresholds_cat[,all_category_order, with=FALSE]
            thresholds_cat <- data.table(thresholds_cat)
            thresholds_cat <- moveMeDataTable(thresholds_cat, "rn", "first")
            sum_categories <- vector()
            for (i in seq_along(categories_vec)) {
                sum_categories[i] <- paste0(categories_vec[i], "_total")
            }
            thresholds_cat[,sum_categories] <- NA
            thresholds_cat <- data.table(thresholds_cat)
            thresholds_cat <- as.data.frame(lapply(thresholds_cat, as.character))
            sum_categories_clean <- gsub("_", " ", sum_categories)
            thresholds_cat[3,sum_categories] <- lapply(sum_categories_clean, paste)
            thresholds_cat <- data.table(thresholds_cat)
            for (i in seq_along(categories_vec)) {
                thresholds_cat <- moveMeDataTable(data = thresholds_cat, tomove = sum_categories[i], where = "after", ba = paste0( categories_vec[i], "_3000_1000000"))
            }
        } else {
            thresholds_cat[,"Total"] <- "Total"
        }

        out <- map(1:nrow(frequency_table), function(index){
            out <- data.frame("freq" = frequency_table$freq[[index]], 
                                "span" = frequency_table$colspan[[index]]) %>% 
                tidyr::uncount(span, .id = 'span') %>% 
                mutate(freq = ifelse(span>1, NA, freq)) %>% 
                t %>% 
                as.data.frame() %>% 
                mutate(rn = frequency_table$rn[[index]],
                    across(everything(), ~as.character(.))) %>% 
                select(rn, everything()) %>%
                # Correct names need to be selected
                set_names(nm = names(thresholds_cat)) %>% 
                slice(1)
            return(out)
        }) 

        combined <- thresholds_cat %>% 
            mutate(across(everything(),  ~as.character(.))) %>% 
            bind_rows(out)

        if (!is.na(categories_vec[1])) {
            combined <- combined[-c(1:3),]
        } else {
            combined <- combined[-c(1:2),]
        }
        
        threshold_list <- list()
        if (!is.na(categories_vec[1])) {
            for (i in seq_along(categories_vec)) {
                first_col <- thresholds_cat[,1]
                cols <- thresholds_cat %>% select(starts_with(categories_vec[i]))
                threshold_list[[i]] <- cbind(first_col, cols)
            } 
        } else {
            threshold_list[[1]] <- thresholds_cat
        }

        spans <- map(1:length(frequency_table$colspan), function(index){
            spans <- frequency_table$colspan[[index]] %>%  
            as_tibble() %>% 
            mutate(idx = row_number()) %>% 
            tidyr::uncount(value, .remove = F) %>% 
            group_by(idx) %>%
            mutate(pos = 1:n(),
                    value = ifelse(pos != 1, 0, value)) %>% 
            ungroup() %>% 
            select(value) %>% 
            t
            return(append(1, spans))
        })    

        list_of_cats <- list()
        if (!is.na(categories_vec[1])) {
            for (i in seq_along(categories_vec)) {
                # assign(paste0(categories_vec[i]), combined %>% select(starts_with(categories_vec[i])))
                print(categories_vec[i])
                first_col <- combined[,1]
                other_cols <- combined %>% select(starts_with(categories_vec[i]))
                out_other_cols1 <<- other_cols

                # Too late for totals, since LBT data is already in there, but can be replaced!
                # if (cases when totals should be taken from BIN) {
                #   out_list_of_row_sums_stored_from_only_bin <<- list_of_row_sums_stored_from_only_bin
                #   other_cols[,ncol(other_cols)] <- list_of_row_sums_stored_from_only_bin[[i]]
                # }
                
                out_other_cols2 <<- other_cols
                list_of_cats[[i]] <- cbind(first_col, other_cols)
                names(list_of_cats)[i] <- paste0(categories_vec[i], "-gebieden")

                list_of_cats[[i]] <- flextable(list_of_cats[[i]]) %>% theme_box()    
                for (i in seq_along(spans)){
                    spans[[i]] <- spans[[i]][1:11]
                }
                number_of_columns <- (ncol(combined)-1)/length(categories_vec)+1
            }
        } else {
            list_of_cats[[1]] <- flextable(combined) %>% theme_box()   
            number_of_columns <- ncol(combined)
        }



        # FIX IS IN WRONG SPOT
        for (i in 1:length(threshold_list)){
            threshold_list[[i]][[length(threshold_list[[1]])-1]] <- gsub("1000000", "Inf", threshold_list[[i]][[length(threshold_list[[1]])-1]] )
        }

        for (i in seq_along(list_of_cats)) {

            # FIX INDEX
            flextable_out <- list_of_cats[[i]]

            flextable_out$body$spans$rows[1:nrow(flextable_out$body$spans$rows),] <- matrix(unlist(spans), ncol = number_of_columns, byrow = TRUE)
            
            # Label var names
            a <- names(threshold_list[[i]])
            b <- as.vector(unlist(threshold_list[[i]][1,]))
            values <- as.list(setNames(b, a))
            flextable_out <- set_header_labels(flextable_out, values=values)
            for (j in 2:nrow(threshold_list[[1]])) {
                flextable_out <- add_header(flextable_out, top = FALSE, values = threshold_list[[i]][j,])
            }
            
            flextable_out <- align(flextable_out, align = "center", part = "all")

            FitFlextableToPage <- function(ft, pgwidth = 6) {

                ft_out <- ft %>% autofit()

                ft_out <- width(ft_out, width = dim(ft_out)$widths*pgwidth /(flextable_dim(ft_out)$widths))
                return(ft_out)
            }


            # set_table_properties(flextable_out, layout = "autofit")
            flextable_out <- FitFlextableToPage(flextable_out, pgwidth = 6)
            
            # GREY THRESHOLDS
            flextable_out <- bg(flextable_out, bg="#bfbfbf", part = "header")
            flextable_out <- color(flextable_out, color="white", part = "header")
            std_border = fp_border(color="white", width = 1)

            # BOLD FIRST COLUMN            
            flextable_out <- bold(flextable_out, j = c(1), bold = TRUE, part = "body")
            
            colwidths <- c(rep(1,length(threshold_list[[1]])))
            values <- c("Type of farm", rep("",length(threshold_list[[1]])-1))
            flextable_out <- add_header_row(flextable_out, top = FALSE, values = values, colwidths = colwidths)
            flextable_out <- italic(flextable_out, i= nrow(thresholds_cat)+1, italic = TRUE, part = "header")
            flextable_out <- bold(flextable_out, i= nrow(thresholds_cat)+1, bold=FALSE, part = "header")
            flextable_out <- color(flextable_out, i= nrow(thresholds_cat)+1, color="black", part = "header")
            flextable_out <- bg(flextable_out, i= nrow(thresholds_cat)+1, bg="white", part = "header")

            # WHITE BORDERS
            flextable_out <- border_inner(flextable_out, border = std_border, part = "header")

            flextable_out <- fontsize(flextable_out, i = NULL, j = NULL, size = 8, part = "header")
            flextable_out <- fontsize(flextable_out, i = NULL, j = NULL, size = 8, part = "body")
           
            if (isTRUE(mark_low_vals)) {
                    flextable_out <- bg(flextable_out, bg = my_color_fun)
                    flextable_out <- bg(flextable_out, j=c(11), bg = "white")
                    # flextable_out <- bg(flextable_out, j=c(11), bg = my_color_total_fun)
                    flextable_out <- bg(flextable_out, j=c(1), bg = "white")
                    # Sum column under 30 is red
                    flextable_out <- bg(flextable_out, j=c(2), bg = "white")
            } 

            if (!is.na(categories_vec[1])) {
                flextable_out <- set_caption(flextable_out, 
                    caption = "Sampling fraction according to the 2018 Agricultural Census by stratum and category", 
                    style = "Table Caption", 
                    autonum = run_autonum(seq_id = "tab", bkm = "tab1"))
            } else {
                flextable_out <- set_caption(flextable_out, 
                    caption = "Sampling fraction according to the 2018 Agricultural Census by stratum", 
                    style = "Table Caption", 
                    autonum = run_autonum(seq_id = "tab", bkm = "tab1"))
            }

            list_of_cats[[i]] <- flextable_out
            
        }    

    }

    return(list_of_cats)

}
Ellesaere/tkis.package documentation built on Sept. 14, 2022, 10:02 a.m.