#' In the full dataset, for each individual, obervation years without phenophase events observed
#' (leaf event, flowering, fruiting, seed dispersal) had been removed as missing.
#' Thereby, some timelines of individuals do not have consecutive dates
#' Here, these missing dates are filled with phenological value of NA, within the individual-specific start-end years
#'
#' There is an optio to fill small gaps with phenological value NA, based on the length of the missing dates:
#' for example, it is not unusual for these years without phenophase events to exist.
#' Therefore we could still assume these years as observed but without events.
#' To still be cautious about filling in these years, you can select the size of gap of consecutive years without phenophase events
#' (e.g. 2 years without observed events).
#' If this is longer, we keep NA
#'
#' @param data junglerhythms data file
#' @param species_name list of species
#' @param pheno only one phenophase
#' @param gapfill_missingyears number of consecutive years with assumed observations without occured events that can be filled with zero
#' @export
#' @return datasets with full timelines at the individual level
missing_years <- function(
data = data,
species_name = "Afzelia bipindensis",
pheno = "leaf_turnover",
gapfill_missingyears = 2){
timelines_output <- data.frame()
for (j in 1:length(species_name)){
data_subset <- data %>%
filter(species_full %in% species_name[j]) %>%
filter(phenophase %in% pheno)
# convert date
data_subset$date <- as.Date(paste(data_subset$year,
round((data_subset$week*7.6)-7),sep="-"), "%Y-%j")
# sort dataframe according to date
data_subset <- data_subset %>%
dplyr::arrange(date)
#---
# grow dataset to full range:
# if consecutive years of missing data <= max number of years years -> fill with zero -> this needs to be done at ID level
#---
individuals <- unique(data_subset$id)
data_grow <- data.frame()
for (i in 1:length(individuals)){
data_ind_grow <- data_subset %>%
filter(id %in% individuals[i])
# get list of initial years in reduced dataset
initial_years <- unique(data_ind_grow$year)
# grow reduced dataset to full range based on min and max years
years <- sort(rep(min(data_ind_grow$year):max(data_ind_grow$year), 48))
days <- rep(round((1:48 * 7.6) - 7),length(unique(years)))
dates_full <- data.frame(date = as.Date(paste(years, days, sep = "-"), "%Y-%j"))
data_ind_grow <- merge(data_ind_grow, dates_full, by = "date", all.y = TRUE)
# fill 'grown datasets' with species name, phenophase, id and year, in empty years
data_ind_grow$species_full <- unique(na.omit(data_ind_grow$species_full))
data_ind_grow$phenophase <- unique(na.omit(data_ind_grow$phenophase))
data_ind_grow$id <- unique(na.omit(data_ind_grow$id))
data_ind_grow$year <- format.Date(data_ind_grow$date, "%Y")
data_ind_grow$week <- ifelse(is.na(data_ind_grow$week), c(1:48), data_ind_grow$week)
# get list of years in grown dataset
full_range_years <- as.numeric(unique(data_ind_grow$year))
# get list of years that were missing in the initial dataset and group those that are consecutive
missing_years <- setdiff(full_range_years, initial_years)
# if missing years found, run statement to fill in with zero if consecutive period is limited to 2 years
if(length(missing_years) > 0){
missing_years_consec <- cumsum(c(1, abs(missing_years[-length(missing_years)] - missing_years[-1]) > 1))
missing_years <- as.data.frame(cbind(missing_years, missing_years_consec))
colnames(missing_years)[1] <- 'year'
missing_years_length <- missing_years %>%
group_by(missing_years_consec) %>%
dplyr::summarise(lgh_consec_years = length(missing_years_consec))
missing_years <- merge(missing_years, missing_years_length, by = "missing_years_consec", all.x = TRUE)
# merge with full range dataset
data_ind_grow <- merge(data_ind_grow, missing_years, by = "year", all.x = TRUE)
# if only 2 consecutive years of missing data -> fill with zeros. If longer, keep NA
data_ind_grow$value <- ifelse(is.na(data_ind_grow$value) & data_ind_grow$lgh_consec_years <= gapfill_missingyears , 0, data_ind_grow$value) #< 3
} else {
data_ind_grow$missing_years_consec <- "NA"
data_ind_grow$lgh_consec_years <- "NA"
}
data_grow <- rbind(data_grow, data_ind_grow)
}
# #-----------------------------
# # overview plots (from Bush)
# #-----------------------------
# # timeseries plot of raw data for each individual
# grown_plots <- ggplot(data_grow,
# aes(x = date,
# y = value))+
# geom_line()+
# scale_y_continuous(labels=NULL)+
# ylab("Observations") +
# xlab("") +
# # ggtitle(paste(species_name[j], "-", pheno)) + #species_name[j]
# theme_light(base_size = 14) +
# theme(legend.position = "none") +
# facet_wrap(~id,
# ncol = 1,
# strip.position = "left")
#
# plot(grown_plots)
timelines_output <- rbind(timelines_output, data_grow)
}
return(timelines_output)
}
# firsttest <- two_year_gaps(data,
# species_name = c("Albizia adianthifolia","Allanblackia floribunda","Afrostyrax lepidophyllus"), #"Erythrophleum suaveolens",Irvingia grandifolia, Pericopsis elata
# pheno = "leaf_turnover") #flowers, leaf_turnover, leaf_dormancy, fruit, fruit_drop
#
# data$date <- as.Date(paste(data$year,
# round((data$week*7.6)-7),sep="-"), "%Y-%j")
# alb1 <- data %>%
# filter(species_full %in% "Albizia adianthifolia") %>%
# filter(phenophase %in% "leaf_turnover") %>%
# group_by(date) %>%
# dplyr::summarise(mean_value = mean(value))
# ggplot(alb1,
# aes(x = date,
# y = mean_value))+
# geom_line()+
# scale_y_continuous(labels=NULL)+
# ylab("Observations") +
# xlab("") +
# theme_light(base_size = 14) +
# theme(legend.position = "none")
#
# alb2 <- firsttest %>%
# filter(species_full %in% "Albizia adianthifolia") %>%
# filter(phenophase %in% "leaf_turnover") %>%
# group_by(date) %>%
# dplyr::summarise(mean_value = mean(value))
# ggplot(alb2,
# aes(x = date,
# y = mean_value))+
# geom_line()+
# scale_y_continuous(labels=NULL)+
# ylab("Observations") +
# xlab("") +
# theme_light(base_size = 14) +
# theme(legend.position = "none")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.