library(knitr)
opts_knit$set(root.dir="../../..") # file paths are relative to the root of the project directory
library(tradeflows)
library(dplyr)
library(RMySQL)

Monthly data

dtf <- readdbproduct(440799, tableread = "raw_flow_monthly")
unique(dtf$flag)
unique(dtf$period)
unique(dtf$unit)
unique(dtf$reporteriso)
unique(dtf$partneriso)
unique(dtf$quantity)
cleandbproduct(440799, tableread =  "raw_flow_monthly", 
               tablewrite = "validated_flow_monthly")

# Error in addpartnerflow(.) : 
# Remove duplicated entries before adding partner flows     
# Error corrected by inserting the removeduplicatedflows() 
# function call directly under cleandbproduct()

Develop a clean monthly function

#' The function uses regional conversion factors 
#' and unit prices generated from the yearly data frame.
cleanmonthly <- function(dtfmonthly, 
                         dtfyearly,
                         geoaggregation = "region",
                         replacebypartnerquantity = TRUE,
                         shaveprice = TRUE,
                         outputalltables = FALSE){
    ### Prepare yearly conversion factors and prices
    y <- clean(dtfyearly, geoaggregation = geoaggregation, outputalltables = TRUE)

    ### Prepare monthly data
    # Keep only columns usefull for R, 
    # Those efi column names that are in config/column_names.csv
    columnsread <- names(dtfmonthly)[names(dtfmonthly) %in%
                                   column_names$efi[column_names[,"raw_flow_monthly"]]]
    dtfmonthly <- dtfmonthly %>%
        select_(.dots= columnsread) %>%
        removeduplicatedflows %>%
        #     no quantity means no conversion factor and price
        #     addconversionfactorandprice %>%
        addregion
    nrowbeforechange <- nrow(dtfmonthly)

    ### Estimate quantity
    # Replace "exports" and "import" by "export" and "import"
    dtfmonthly$flow <-gsub("ports", "port", dtfmonthly$flow)
    dtfmonthly <- dtfmonthly %>%
        estimatequantity(y$price, y$conversionfactor) %>%
        addpartnerflow
    if (replacebypartnerquantity){
        dtfmonthly <- dtfmonthly %>% replacebypartnerquantity(y$choice)
    }
    if (shaveprice){
        dtfmonthly <- dtfmonthly %>% 
            mutate(price = tradevalue / quantity) %>% # Recaculate  price 
            shaveprice # based on yearly upper and lower prices added above
    }

    # Check if the number of rows has changed (it shouldn't)
    # It might change if there are duplicated flows
    stopifnot(nrow(dtfmonthly) == nrowbeforechange)

    ### 2 different kinds of output
    # List output
    if(outputalltables){
        return(list(dtf = dtfmonthly,
                    price = y$price,
                    conversionfactor = y$conversionfactor,
                    choice = y$choice)
        )
    }
    # Data frame output
    return(dtfmonthly)
}

dtfm <- readdbproduct(440799, tableread = "raw_flow_monthly")
dtfy <- readdbproduct(440799, tableread = "raw_flow_yearly")
bli <- cleanmonthly(dtfm, dtfy, shaveprice = TRUE, outputalltable=TRUE)
checkdbcolumns(c("raw_flow_monthly", "validated_flow_monthly"))
dtfmonthly <- readdbproduct(440799, tableread = "raw_flow_monthly")
dtfyearly <- readdbproduct(440799, tableread = "raw_flow_yearly")
duplicates <- findduplicatedflows(dtfm)
# Keep only colmuns usefull for R, 
# Those efi column names that are in config/column_names.csv
columnsread <- names(dtfm)[names(dtfm) %in%
                              column_names$efi[column_names[,"raw_flow_monthly"]]]
dtfm <- dtfm %>% 
    select_(.dots= columnsread) 


### clean() function and issues
dtf <- clean(dtf,
             shaveprice = TRUE)
unique(dtf$quantity)
unique(dtf$flag)


# Remove duplicated flow Albania to Greece
alb <- dtf %>% filter(reporter=="Albania", partner=="Greece", 
                      period==201202)
alb2 <- removeduplicatedflows(alb)
# select(-aika) is what I realy want to do
alb2 <- alb %>% select_(.dots=usefullcolumns) %>% unique

Yearly data

checkdbcolumns()
cleandbproduct(440799, tableread =  "raw_flow_yearly", tablewrite = "validated_flow_yearly")


paul4forest/tradeflows documentation built on Oct. 8, 2019, 10:35 a.m.