library(efsagis)
library(efsa2016.00601)
library(dplyr)
library(pander)
library(sp)
library(tidyr)
library(ggplot2)
library(ggalt) # devtools::install_github("hrbrmstr/ggalt") 

knitr::opts_chunk$set(comment=NA, fig.width=14, fig.height=14/2.4,dpi=221,echo=F,cache=T,fig.path="figures/",
                      autodep=T)
usePng <- F

exportCsv <- function(df,filename) {
    write.csv(df,paste0("./output/",filename,".csv"))
}


renderDF <- function(df,caption,digits=panderOptions("digits")) {
    exportCsv(df,gsub(" ","_",caption))
    pander::pander(df %>% as.data.frame ,
                   caption=caption,
                   digits)


}


nextPlot <- function(name) {

    if (usePng & !names(dev.cur())=="null device") {
        dev.off()
    }
    if (usePng) {
        png(paste0("./output/mapProposals/",name,".png"),width = 1366,height=768)
    } else {
        ##readline(prompt=paste0(name,": Press [enter] to continue"))

    }
    invisible()

}
if (usePng) {
    system("rm output/mapProposals/*.png")

}
data(NUTS_01M_2013,package="efsagis")
data(countries_01M_2013,package="efsagis")
data(euCitrusSurface,package = "efsa2016.00601")

world.eu <- worldEu()
EU_NUTS.0 <- euNuts0()
prepare_citrus_layer()
exportCsv(euCitrusSurface,"euCitrusSurfaceData")

EU citrus production surface area

data

The following table is showing the citrus production surface in Europe. The data was collected from the national statistical offices of the individual countries. It most cases the data was available on NUTS-3 level, in some cases it was extrapolated from NUTS2. These cases are listed in table xxx. The table shows as well the latest year of which data was available.

data <-  latestData() 
mostRecentData <- data %>%
    dplyr::select(country,year,NUTS.Code,NUTS3.name,ha,citrus_density) %>%
    dplyr::arrange(NUTS.Code) 
renderDF(mostRecentData ,caption="Citrus product surface area per NUTS3 region")

The following map shows the citrus production from above table with colorcoding according to the area of citrus production in each NUTS3 region.

in ha

nextPlot("surface_ha")
## surface
base_layer() +
    worldCountries_layer(world.eu) +
    citrusSurface_layer(column="ha",alpha=1) +
    nuts0_layer()+
    nuts3_layer() +

    tmap::tm_format_Europe("EU citrus production")

The next table aggregates the citrus area by country.

countryHa <- data %>%
    dplyr::group_by(country) %>%
    dplyr::summarise(sum(ha))

renderDF(countryHa,caption="Citrus product surface area per country")

This table shows for which NUTS3 regions data transformations where applied, in order to have all data in the same NUTS3 classification.

comments <- data %>%
    dplyr::mutate(comment=ifelse(is.na(comment),"",comment)) %>%
    dplyr::group_by(country,year,comment) %>%
    dplyr::select(country,comment) %>%
    dplyr::slice(1)
renderDF(comments,caption="Notes on data transformations")

as density ha / km^2

The following table shows the EU citrus production surface as density, instead of absolute surface.

nextPlot("surface_density")
base_layer() +
    worldCountries_layer(world.eu) +
    citrusSurface_layer(column="citrus_density",alpha=1) +
    nuts3_layer() +
    nuts0_layer()+
    tmap::tm_format_Europe() 

Citrus surface + Magarey 2015 stations

In the following map the Magarey 2015 EU locations are shown and two columns of table 2 of Magarey 2015 are shown, which characterize the ascospore infection risk.

with ascospores infection score

nextPlot("surface_MagAsco")
base_layer() +
    worldCountries_layer(world.eu) +
    citrusSurface_layer(column="citrus_density") +
    nuts0_layer()+
    nuts3_layer() +
    magarey_layer(column_text = "asco_days_average",
                  title_text = "Ascospores",
                  column_size = "asco_suit_years",
                  title_size = "Magarey et al. 2015\nAscospores \n% suitable years",
                  style = "fixed",
                  alpha = 0.5,
                  scale=1.2
                  ) +
  tmap::tm_legend(title.size=1.0,
                  text.size=0.6) +
    tmap::tm_format_Europe_wide(inner.margins = c(0, 0.18, 0, 0),
                                title="Magarey et al. 2015 ascospores\ninfection scores") 

with pycnidiospres infection scores

This map shows the pycnidiospores infection score from Magarey 2015 table 2.

nextPlot("surface_MagPycnidio") 
base_layer() +
    worldCountries_layer(world.eu) +
    citrusSurface_layer(alpha=1,column="citrus_density") +
    nuts3_layer() +
    nuts0_layer()+
    magarey_layer(column_text = "pyc_average",
                  title_text = "Pycnidiospores",
                  column_size = "pyc_suit_years",
                  title_size = "Magarey et al. 2015\nPycnidiospores \n% suitable years",
                  style="fixed",
                  alpha=0.5,
                  scale=0.6) +
  tmap::tm_legend(title.size=1.0,
                  text.size=0.6) +
    tmap::tm_format_Europe_wide(inner.margins = c(0, 0.18, 0, 0),
                                title="Magarey et al. 2015 pycnidiospores\ninfection scores") 

Citrus surface + EFSA 2014

surface as polygon

nextPlot("surface_efsa2014")


base_layer() +
    nuts0_layer()+
    worldCountries_layer(world.eu) +
    nuts3_layer() +
    infection_layer("extdata/spores2/Asco_3_15_Model_AVG.xlsx","Jun",
                    "Proportion of infection events (%)",
                    .5) +

    citrusSurface_outline_layer() +
    tmap::tm_format_Europe()

EFSA 2014 + Magarey 2015

base_layer() +
    nuts0_layer()+
    worldCountries_layer(world.eu) +
    nuts3_layer() +
    infection_layer("extdata/spores2/Asco_3_15_Model_AVG.xlsx","Jun",
                    "EFSA2014 proportion of infection events (%)",
                    .5) +
        magarey_layer(column_text = "asco_days_average",
                  title_text = "Ascospores",
                  column_size = "asco_suit_years",
                  title_size = "Magarey et al. 2015\nAscospores \n% suitable years",
                  style = "fixed",
                  alpha = 0.5,
                  scale=1.2
                  ) +
    tmap::tm_format_Europe()

rain/non rain comparison

library(SparkR)
                                        ##sc <- SparkR::sparkR.init(master="local[2]",sparkPackages = "com.databricks:spark-csv_2.10:1.3.0")
sc <- SparkR::sparkR.init(master="local[4]")
sqlContext <- SparkR::sparkRSQL.init(sc)
sum_by_rain_gridno <- sum_by_rain_gridno(sqlContext)
sum_by_rain <- sum_by_rain(sqlContext)
SparkR::sparkR.stop()
detach("package:SparkR")

In order to assess the impact of inclusion or exclusion of the condition of rain on the model, the following table compares the total number of infection events from EFSA 2014, seperated by presence/non-presence of rain on a given day.

renderDF(sum_by_rain,"EFSA 2104 infection events with and without presence of rain")

The next 2 plots show the avverage number of infection events from EFSA 2014 on days without rain and with rain.

plotInfections(sum_by_rain_gridno,0) 
plotInfections(sum_by_rain_gridno,1) 

In the following we show the difference between inclusion/exclusion of rain for the Magarey 2015 EU locations. The table shows the total number of infection events from the EFSA 2014 model with and without rain for each of the Magarey 2015 EU locations.

The columns are: EFSA infections : total number of infection events from EFSA 2014 EFSA infections filtered : number of infection events, excluding days without rain from EFSA 2014 asco_days_average : Magarey 2015 ascospores infection score asco_suit_years : Magarey 2015 ascospores suitable years pyc_average : Magarey 2015 pycnidiospores infection score pyc_suit_years : Magarey 2015 pycnidiospores suitable years

joined <- joinEfsaGridMagereyPts(sum_by_rain_gridno,"`sum(INFECTION_EVENTS)`") %>%
    dplyr::rename(infection_events=`sum(INFECTION_EVENTS)`)

joined.pres  <- joined %>%
    as.data.frame %>%
    tidyr::spread(ind_rain,infection_events) %>%
    dplyr::rename(efsa_infs_no_rain=`0`) %>%
    dplyr::rename(efsa_infs_rain=`1`)
joined.pres <- joined.pres[,1:7]
joined.pres <- joined.pres %>%
    dplyr::left_join(readMagTable2()) %>%
    dplyr::mutate(efsa_total_infections=efsa_infs_no_rain+efsa_infs_rain) %>%
    dplyr::select(Country,Location,efsa_total_infections,efsa_infs_rain,
                  efsa_infs_no_rain,
                  asco_days_average,asco_suit_years, pyc_average,  pyc_suit_years) %>%

    tbl_df() %>%
    dplyr::arrange( -asco_days_average,-pyc_average)
exportCsv(joined.pres,"rainForMagereyPoints")
colnames(joined.pres) <- c( "Country"   ,
                          "Location"      ,
                          "EFSA infections",
                          "EFSA infections\nfiltered"   ,
                          "efsa_infs_no_rain",
                          "Magarey\nasco_days\naverage",
                          "Magarey\nasco\nsuit_years"  ,
                          "Magarey\npyc\naverage"    ,
                          "Magarey\npyc\nsuit_years")
pander::pander(joined.pres %>% select(-efsa_infs_no_rain),
               caption = "EFSA 2014 infections for Magarey 2015 locations",
               split.cells=T,
               split.tables=Inf)

The data from above shown as a bar plot.

plotMag2015byRain(joined)
colRain <- "#9fb0ff"
colTotal <- "#ff5555"


joined.pres <- joined.pres %>%
  na.omit() %>%
    mutate(diff=`EFSA infections` - `EFSA infections\nfiltered`,
           Location = reorder(Location,diff)) %>%
    arrange(-diff) 

gg <- ggplot(joined.pres)
                                        # doing this vs y axis major grid line
gg <- gg + geom_segment(aes(y=Location, yend=Location, x=-25, xend=2500), color="#b2b2b2", size=0.15)
                                        # dum…dum…dum!bell
gg <- gg + geom_dumbbell(aes(x=`EFSA infections\nfiltered`, xend=`EFSA infections`, y=Location),
                        size=1.5, color="#b2b2b2", point.size.l=3, point.size.r=3,
                        point.colour.l=colRain, point.colour.r=colTotal)
                                        # text below points
gg <- gg + geom_text(data=slice(joined.pres,1),
                    aes(x=`EFSA infections\nfiltered`, y=Location, label="Infections events filtered by days with rain"),
                    color=colRain, size=3, vjust=-2.5, fontface="bold", family="Calibri")
gg <- gg + geom_text(data=slice(joined.pres, 1),
                    aes(x=`EFSA infections`, y=Location, label="All infection events"),
                    color=colTotal, size=3, vjust=-2.5, fontface="bold", family="Calibri")
                                        # text above points
gg <- gg + geom_label(aes(x=`EFSA infections\nfiltered`, y=Location, label=`EFSA infections\nfiltered`),
                     color=colRain, size=2.75, nudge_x = -50, family="Calibri")

gg <- gg + geom_label(color=colTotal, size=2.75, nudge_x = 50, family="Calibri",
                     aes(x=`EFSA infections`, y=Location, label=`EFSA infections`))
                                        # difference column
gg <- gg + geom_rect(aes(xmin=2500, xmax=2600, ymin=-Inf, ymax=Inf), fill="#efefe3")
gg <- gg + geom_text(aes(label=diff, y=Location, x=2550), fontface="bold", size=3, family="Calibri")
gg <- gg + geom_text(data=slice(joined.pres, 1), aes(x=2550, y=Location, label="DIFF"),
                    color="#7a7d7e", size=3.1, vjust=-2, fontface="bold", family="Calibri")
gg <- gg + scale_x_continuous(expand=c(0,0), limits=c(-25, 2600))
gg <- gg + scale_y_discrete(expand=c(0.075,0))
gg <- gg + labs(x=NULL, y=NULL, title="Influence of the constraint of rain on predicted ascospore infection events")
gg <- gg + theme_bw(base_family="Calibri")
gg <- gg + theme(panel.grid.major=element_blank())
gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(axis.ticks=element_blank())
gg <- gg + theme(axis.text.x=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold"))
gg

Chapter 2.1

We have used the following data sources for the preparation of the tables and maps.

Citrus production data

The citrus production data was obtained from the offical national statistical offices of the citrus producing countries of Europe. The data was then combined into a single EU dataset, retaining the latest available data for each country.

Infection events from EFSA 2014

The infection events for the rain/non rain comparisons were obtained from the output file of the run of the ascospores infection model for the EFSA 2014 opinion on CBS. It was the same data then used in the EFSA 2014 opinion. The data set contains the number of daily infection events and the information, if any rain was present on that day. We use the data from the model run with D_50 = 3 hours and T_min = 10 °C, the same as in EFSA 2014.

Infection scores from Magarey 2015

The infection scores from Magerey 2015 were downloaded from the web page of the article at Science Direct. http://www.sciencedirect.com/science/article/pii/S0261219415300387

Climate zones from Martinez 2015

The climat zone raster files were obtained by personal communication with the author of Martinez 2015.

if(usePng) {
    dev.off()
    zip("mapProposals.zip","./output/mapProposals/")
}

Estimated influence of data provided by Magarey on the Efsa2014 establishment risks in Europe

It was estimated, which potential effect the incorporation of the Magarey data into the CBS risk map of Europe could have.

In EFSA2014 a CBS establishment risk was calculated for 3036 grid cells. The Magarey paper calculates a risk score for 36 locations in Europe, which can be matched to 33 grid cells, assuming that the risk for a specific location is the same in the complete 25 * 25 km grid cell containing the location. For 2 locations (Thessaloniki,Firenze) no mtaching grid cell from EFSA2014 could be found and 2 Magarey2015 locations are inside the same grid cell.

So the Magarey 2015 data could have an direct impact on maximal 1.15 % of the grid cells for which a CBS establishment risk was calculated in EFSA2014.

To estimate which impact the Magarey2015 data could have, the EFSA2015 and the Magarey risk indices where normalised into a single range (0,1) and then converted to a relative risk to make them comparable.

Normalised infection scores

The following graph shows the normalised risk scores for each Magarey location and the corresponding EFSA2014 grid cell.

max_events <- sum_by_rain_gridno %>% 
  group_by(GRID_NO) %>% 
  summarise(total_events=sum(`sum(INFECTION_EVENTS)`)) %>%
  filter(total_events==max(total_events))

joined.pres.norm <- joined.pres %>%
     bind_rows(data_frame(Location="EFSA grid cell[60126]",
             `EFSA infections`=max_events$total_events[1])) %>%



    dplyr::mutate(`EFSA infections` = `EFSA infections` / max(`EFSA infections`,na.rm=T)) %>%
     dplyr::mutate(`Magarey\nasco_days\naverage` = `Magarey\nasco_days\naverage` / max(`Magarey\nasco_days\naverage`,na.rm=T)) %>%
     mutate( 
         Location = reorder(Location,`EFSA infections`)) 

joined.pres.norm %>% gather(type,value,`EFSA infections`,`Magarey\nasco_days\naverage`) %>%
    select(Location,type,value) %>%
    mutate(type = ifelse(type == "EFSA infections","norm. EFSA inf. score",type),
           type = ifelse(type == "Magarey\nasco_days\naverage","norm. Magarey inf. score",type)) %>%
    ggplot() +
    geom_bar(aes(x=Location,fill=type,y=value),stat="identity",position="stack") +
    #geom_hline(yintercept = c(0.33,0.66),color="black",na.rm=T) +
    #geom_label(aes(x='Firenze',y=0.15,label="Low")) +
    #geom_label(aes(x='Firenze',y=0.48,label="Middle")) +
    #geom_label(aes(x='Firenze',y=0.81,label="High")) +

    ggplot2::coord_flip() 

The normalised risk scores where then encoded as relative risk scores 'high','middle','low'' by cutting them into 3 intervals of 0.33 and brought into a table form for easy comparison. These levels have no absolute meaning, but show the relative risk for a location inside the full numeric range of each risk score.

The following table shows the agreement or dissagreement between both papers on this relative risk level for 33 European Magarey locations.

Magarey locations with relative risk levels

The normalisation results in a table, which gives 2 relative risk levels for each Magaery2015 location.

labels=c('low','middle','high')
breaks <- length(labels)
levelsEfsa <- cut(joined.pres.norm$`EFSA infections`,breaks=breaks,labels=labels)
levelsMagarey <- cut(joined.pres.norm$`Magarey\nasco_days\naverage`,breaks=breaks,labels=labels)
levels <- data_frame(`Location`=joined.pres.norm$Location,
                    `EFSA2014 relativ risk level`=levelsEfsa,
                    `Magarey2015 relative risk level`=levelsMagarey) %>%
    na.omit()

    pander::pander(levels,
                   caption="Table of the 36 Magarey locations with their relative risk levels from the EFSA2014 opinion and the Magarey 2015 paper.")

Crosstable of Efsa2014 and Magarey2015 relative risk levels

Both papers agree on the relative risk level for 54 % of the grid cells. For these 19 grid cells, the use of Magarey data is not expected to result in a significant change of the EFSA2014 calculated CBS risk.

So the potential impact of the Magarey data is reduced to 0.53 % of all grid cells.

pander::pander(table(levelsEfsa,levelsMagarey),
               caption="Table showing the agreement/disagreement between the relative risk levels from EFSA2014 and Magarey2015")
                                        #library(irr)
                                        #lvs <- data.frame(efsa=levelsEfsa,mag=levelsMagarey) %>%
                                        #na.omit()
                                        #kappa2(lvs)
                                        #agree(lvs)

statistical analysis EFSA2014 and Magarey ascospore infection scores

Plot of data

loess model

    ggplot(joined.pres,aes(x=`EFSA infections`, y=`Magarey\nasco_days\naverage`)) + 
      geom_point() +
      geom_smooth() +
      ggtitle("Magarey et al. 2015 and EFSA PLH 2014 ascospore infection scores for the 33 Magarey et al. 2015 EU locations") +
      labs(x="EFSA PLH 2014 infection score",y="Magarey et al. 2015 infection score")

linear model

    ggplot(joined.pres,aes(x=`EFSA infections`, y=`Magarey\nasco_days\naverage`)) + 
      geom_point() +
      geom_smooth(method = "lm") +
      ggtitle("Magarey et al. 2015 and EFSA PLH 2014 ascospore infection scores for the 33 Magarey et al. 2015 EU locations") +
      labs(x="EFSA PLH 2014 infection score",y="Magarey et al. 2015 infection score")

Plot of ranks

loess model

        ggplot(joined.pres,aes(x=rank(`EFSA infections`), y=rank(`Magarey\nasco_days\naverage`))) +       geom_point() +
      geom_smooth() +
            ggtitle("Magarey et al. 2015 and EFSA PLH 2014 ranked ascospore infection scores for the 33 Magarey el al. EU locations") +
      labs(x="Ranked EFSA PLH 2014 infection score",y="Ranked Magarey et al. 2015 infection score")

linear model

        ggplot(joined.pres,aes(x=rank(`EFSA infections`), y=rank(`Magarey\nasco_days\naverage`))) +       geom_point() +
      geom_smooth(method = "lm") +
            ggtitle("Magarey et al. 2015 and EFSA PLH 2014 ranked ascospore infection scores for the 33 Magarey et al. 2015 EU locations") +
      labs(x="Ranked EFSA PLH 2014 infection score",y="Ranked Magarey et al. 2015 infection score")

3 statistical tests for correlation between the ranks

cor.test(joined.pres$`EFSA infections`, joined.pres$`Magarey\nasco_days\naverage`,method = "pearson")
cor.test(joined.pres$`EFSA infections`, joined.pres$`Magarey\nasco_days\naverage`,method = "kendal",exact = F)
cor.test(joined.pres$`EFSA infections`, joined.pres$`Magarey\nasco_days\naverage`,method = "spearman",exact = F)

session info

sessionInfo()


openefsa/efsa2016.00601 documentation built on May 24, 2019, 2:31 p.m.