#' @title readIndiaAPY
#' This function reads foodcrops data for India taken from here: https://eands.dacnet.nic.in/APY_96_To_07.htm
#' @author Anastasis Giannousakis
#' @param subtype Area, Yield, or Production
#' @importFrom readxl read_excel excel_sheets
#' @importFrom tidyr gather
#' @importFrom dplyr filter %>%
#' @importFrom magclass as.magpie getRegions<-
#' @importFrom madrat downloadSource
#' @examples
#' \dontrun{ a <- madrat::readSource(type="IndiaAPY",subtype="Rice",convert="onlycorrect") }
#' @return magpie object containing Area, Yield, and Production data.
readIndiaAPY <- function(subtype=NA){
# helper function to convert data from wide to long format and specify variable and unit
.gather <- function(x, varunit, season=FALSE){
fct<- 1
if(season) fct<-2
d <- gather(x, "year", "value", colnames(x)[-(1:fct)], factor_key = TRUE) # convert to long format
# d <- d[-c(grep("State", d[,1]), which(is.na(d[, 1]))), ] # remove irrelevant rows
varname <- strsplit(varunit, " \\(")[[1]][[1]] # split variable-unit
unit <- gsub(" |\\)|\\'", "", strsplit(varunit, " \\(")[[1]][[2]])
unit <- sub("000", "k", unit)
out <- cbind("variable"=varname, "unit"=unit, "season"=NA, d)
if (season) out <- cbind("variable"=varname, "unit"=unit, d)
return(out)
}
# helper function to prepare data from excel files for conversion to data.frame
.fixdata <- function(a){
# are the data of type R (with seasons in the 2nd column) or type W (without)?
if (grepl("Summer|Kharif|Rabi|Total",a[,2])) {
type1 <- "R"
} else {
type1 <- "W"
}
if (type1 == "W") {
if (any(grepl("Kharif|Winter|Autumn|Rabi|Summer",names(a)))) {
season<-tolower(strsplit(
strsplit(
grep("Kharif|Winter|Autumn|Rabi|Summer",names(a),value = TRUE),
split="\\(")[[1]][2],
split="\\)")[[1]][1])
} else {
season <- NA
}
if (!grepl(".*.[0-9]-[0-9].*.",a[3,3])) a <- a[-1,]
colnames(a)<-sub("-.*","",a[3, ]) # use row with years as column names
a <- a[-which(is.na(a[, 1])),]
colnames(a)[1] <- "state"
# remove remaining rows that do not contain data
if (grepl("1",a[,1])) a <- a[-which(a[, 1]==1),]
if (length(grep("State",a[,1]))==2) a <- a[-grep("State",a[,1])[[2]],]
} else {
if (!grepl(".*.[0-9]-[0-9].*.",a[2,3])) a <- a[-1,]
if (!grepl(".*.[0-9]-[0-9].*.",a[2,3])) a <- a[-1,]
colnames(a)<-sub("-.*","",a[2,]) # use row with years as column names
a<-a[-intersect(which(is.na(a[,2])),which(is.na(a[,1]))),]
colnames(a)[1:2]<-c("state", "season")
for (i in 1:length(t(a[, 1]))){
if(is.na(a[[i, 1]])) a[[i, 1]]<-a[[i-1, 1]]
}
if (grepl("1",a[,1])) a <- a[-which(a[, 1]==1),] # remove remaining rows that do not contain data
}
ind <- length(which(grepl("[0-9]", colnames(a)))) / 3 # find length of each data table
if (type1 == "R") {
a <- as.data.frame(a)
# remove remaining rows that do not contain data
if (length(grep("State",a[,1]))==2) a <- a[-grep("State",a[,1])[[2]],]
if (length(grep("Estimates",a[,1]))>0) a <- a[-grep("Estimates",a[,1]),]
a[,"season"] <- tolower(a[,"season"])
# remove rows with no data that begin with a season name (generated by readxl in some files)
if (any(grepl("Kharif|Winter|Autumn|Rabi|Summer",a[,1]))) a <- a[-which(is.na(a[, 2])), ]
out<-rbind(
.gather(cbind(a[-1,1:2], a[-1,(3+0*ind):(2+1*ind)]), "Area ( 000 Hektares)", TRUE),
.gather(cbind(a[-1,1:2], a[-1,(3+1*ind):(2+2*ind)]), "Production ( 000 Tonnes)", TRUE)
)
} else {
out <- rbind(
.gather(cbind(a[-1,1], a[-1,(2+0*ind):(1+1*ind)]), "Area ( 000 Hektares)"),
.gather(cbind(a[-1,1], a[-1,(2+1*ind):(1+2*ind)]), "Production ( 000 Tonnes)")
)
out[,"season"] <- season
}
return(out)
}
crop <- NULL
if (file.exists("crops.txt")) {
crops <- readLines("crops.txt")
} else {
stop("please download the data with madrat::downloadSource first")
}
excelfiles <- dir()[grep("xls", dir())] # read-in only excel files
out <- NULL
for (i in crops) {
for (j in grep(i, excelfiles, value = TRUE)) {
suppressMessages(a <- read_excel(j))
tmp <- cbind("crop" = i, .fixdata(a))
out <- rbind(out, tmp)
}
}
dtfile <- "allfood1996-2013.xls"
if (!file.exists(dtfile)) downloadSource("IndiaAPY" , overwrite = TRUE)
for (i in crops) {
suppressMessages(a <- read_excel(dtfile, sheet = grep(i, excel_sheets(dtfile), value = TRUE, ignore.case = TRUE)[1]))
a[,(grep("State",a)[[2]]-1):length(a[1,])] <- NULL # remove section with 5-year average
out <- rbind(out, cbind("crop" = i, .fixdata(a)))
}
dtfile <- "allfood2014-2018.xls"
if (!file.exists(dtfile)) downloadSource("IndiaAPY" , overwrite = TRUE)
for (i in crops) {
suppressMessages(a <- read_excel(dtfile, sheet = grep(i, excel_sheets(dtfile), value = TRUE, ignore.case = TRUE)[1]))
out <- rbind(out, cbind("crop" = i, .fixdata(a)))
}
out <- as.data.frame(out)
# Convert data column to numeric
suppressWarnings(out[["value"]] <- as.numeric(out[["value"]]))
# If applicable, filter out specific crops
if (!is.na(subtype)) out <- filter(out,`crop`==subtype)
out <- as.magpie(out, spatial="state")
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.