library(dplyr)
library(zoo)
library(reshape2)
library(devtools)
library(lubridate)
### New Data ###
folder <- "../smosfiles/data-raw/fullyear/"
files = list.files(path = folder,
pattern = "*.csv")
filenames <- gsub("\\.csv.*","",files)
for (i in 1:length(files)){
assign(filenames[i],
read.csv(paste(folder, files[i], sep=''),header=TRUE)
)}
#Put All 30 Data Frames into a List
list_df = lapply(filenames, get)
#Make Generic Matrix to Fill in Missing Days
day <- seq(1,365)
year <- rep(2010,365)
c <- data.frame(day,year)
yplus <- c(2011, 2012, 2013, 2014, 2015, 2016)
for (j in 1:6){
year <- rep(yplus[j],365)
c <- rbind(c, data.frame(day,year))
}
pixfully <- vector("list", 30)
for (i in 1:30){
pix <- list_df[[i]]
pix <- pix %>% filter(year %in% c(2010, 2011, 2012, 2013, 2014, 2015, 2016))
pix$day <- as.integer(pix$doy)
pix$day[pix$day<1] <- 1
pix$tau[pix$tau<=0] <- NA
pix$tau[pix$tau=="NaN"] = NA
pixsub <- pix %>% group_by(year,day) %>% select(year,tau,day)
pixsub <- pixsub %>% group_by(year,day) %>% summarise(y = mean(tau,na.rm=TRUE))
pixsub$y[pixsub$y=="NaN"] = NA
pixfin <- left_join(c,pixsub,by=c("year"="year","day"="day"))
loc <- rep(pix$dgg[1],2555)
lat <- rep(pix$lat[1],2555)
lon <- rep(pix$lon[1],2555)
pixfin$loc <- loc
pixfin$lat <- lat
pixfin$lon <- lon
pixfully[[i]] <- pixfin
}
#Turn Into Data Objects
devtools::use_data(pixfully, overwrite = TRUE, compress="xz")
### Old Data Cleaning Process ###
folder <- "../smosfiles/data-raw/raw/"
files = list.files(path = folder,
pattern = "*.csv")
filenames <- gsub("\\.csv.*","",files)
for (i in 1:length(files)){
assign(filenames[i],
read.csv(paste(folder, files[i], sep=''),header=TRUE)
)}
list_df = lapply(filenames, get)
#Get a List of Pixel Names and Grab All 180 files
seqpix <- seq(1,180,by=6)
namespix = filenames[seqpix]
spl = strsplit(namespix, "_",fixed=TRUE)
namesfin = unlist(lapply(spl,FUN=function(x){paste(x[1],x[2],sep="_")}))
list_df = lapply(filenames, get)
#L1 Cleaning: For Double Counted Times, keep the L2 Version of Tau
ls <- NULL #Count the number of Duplicates We Are Removing
nodupsdat <- list()
for(i in 1:180){
tmp <- list_df[[i]] %>% add_rownames()
dups <- tmp %>% group_by(doy) %>% filter(n()>=2) %>% filter(L1Cv==1) %>% filter(tau>0)
index <- as.integer(dups$rowname)
ls <- append(ls,index)
if (length(index)>0)
{nodupsdat[[i]] <- tmp[-index,]}
else{
nodupsdat[[i]] <- tmp
}
}
#Group Files by Location. Return a List of 30 Pixels
locpix <- list()
for (i in 0:29){
lower = 1+i*6
upper = 6+i*6
locpix[[i+1]] <- lapply(nodupsdat[lower:upper], as.data.frame)
}
#Make DataFrame with Locations of Pixels
maps2=lapply(locpix,"[[", 1,drop=FALSE)
maps3=lapply(maps2, "[", 1,c(3:5))
anna = do.call(rbind.data.frame, maps3)
#Add Column of Integer Dates
intdat <- vector("list", length(locpix))
for (j in 1:30){
intdat[[j]] <- lapply(locpix[[j]],function(x) transform(x, doyint = as.integer(x[,2])))
}
#Return Back to 180 Files
unlistdat <- unlist(intdat, recursive = FALSE)
#Drop Obs with Tau<=0
Tpos <- vector("list", length(unlistdat))
for(i in 1:180){
Tpos[[i]] <- unlistdat[[i]][unlistdat[[i]]$tau>0,]
}
#Make 2 Lists of 30 pixels: 1 with Filter for Corrupted Signal Filter, and One Without
#Remove Obs if either IRRFI or Chi_2P > .05. Very strict threshold
noisefree <- list()
rm <- NULL
for (i in 1:180){
noisefree[[i]] <- subset(Tpos[[i]],IRRFI <= .05 & Chi_2P <= .05)
diff <- nrow(Tpos[[i]]) - nrow(sub)
rm <- append(rm, diff)
}
#Add Indicator variable for Noise Obs
noiseind <- list()
for (i in 1:180){
noiseind[[i]] <- Tpos[[i]] %>% mutate(corrupt = ifelse(IRRFI>=.05 | Chi_2P >=.05,1,0))
}
#Average Tau Over duplicate Days
Nodup <- lapply(Tpos, function(x) aggregate(tau~doyint,data=x,FUN = mean))
#Aggregate Over Indicator for Noise
FlagIndicator <- lapply(noiseind, function(x) aggregate(corrupt~doyint,data=x,FUN = mean))
#If Multiple Obs Per Interger Day (corrupt=.5), round down to zero
FlagIndicator<-lapply(FlagIndicator,function(x) as.data.frame(cbind(x[,1],ifelse(x[,2]<1,0,1))))
#Turn all 180 files to Zoo Time Series Objects
pix.zoo <- lapply(Nodup, read.zoo, index=1)
pix.zooInd <- lapply(FlagIndicator, read.zoo, index=1)
#print(pix.zoo[[1]],style = "vertical")
#Use Zoo to Fill In Gaps so All Years Have Same Index
wide <- list()
for (i in 0:29){
lower <- 1+i*6
upper <- 6+i*6
l1 <- pix.zoo[lower:upper]
unionl1 <- Reduce(merge,l1)
g <- zoo(,seq(start(unionl1),end(unionl1)))
z <- merge(unionl1,g)
wide[[i+1]] <- z
}
wideNF <- list()
for (i in 0:29){
lower <- 1+i*6
upper <- 6+i*6
l1 <- pix.zooInd[lower:upper]
unionl1 <- Reduce(merge,l1)
g <- zoo(,seq(start(unionl1),end(unionl1)))
z <- merge(unionl1,g)
wideNF[[i+1]] <- z
}
#Note to Self: Let's Try and Use lapply more rather than loops.
wideNF <- lapply(wideNF, function (x) {
names(x) <- c("2010","2011", "2012","2013","2014","2015")
x
})
wide <- lapply(wide, function (x) {
names(x) <- c("2010","2011", "2012","2013","2014","2015")
x
})
#Add Index Column. Note: NF= Noise Filter
wideNF <- lapply(wideNF, function (x) {
x <- add_rownames(as.data.frame(x),"VALUE")
x
})
wide <- lapply(wide, function (x) {
x <- add_rownames(as.data.frame(x),"VALUE")
x
})
#Truncated Year's Worth of Data
notfullyear <- wide
#Split 275 & 276 Days
pixall <- seq(1,30,1)
xday <- c(5,7,8,10,12,17,18,23,25)
dayplus <- pixall[-xday]
#Fill in NA to Make a Full Year for 276 Day Years
for (j in dayplus){
for (i in 58:1){
wide[[j]] <- rbind(c(i,NA,NA,NA,NA,NA,NA),wide[[j]])
}
for (i in 335:365){
wide[[j]] <- rbind(wide[[j]],c(i,NA,NA,NA,NA,NA,NA))
}
}
#Fill in NA to Make a Full Year for 275 Day Years
for (j in xday){
for (i in 59:1){
wide[[j]] <- rbind(c(i,NA,NA,NA,NA,NA,NA),wide[[j]])
}
for (i in 335:365){
wide[[j]] <- rbind(wide[[j]],c(i,NA,NA,NA,NA,NA,NA))
}
}
#Switch Data from Wide to Long Format
pix.long <- vector("list", length(wide))
pix.short <- vector("list", length(notfullyear))
pix.shortNF <- vector("list", length(wideNF))
for (i in 1:30){
pix.long[[i]] <- melt(wide[[i]],id.vars="VALUE",variable.name="year",value.name="T")
names(pix.long[[i]])[names(pix.long[[i]])=="VALUE"] <- "day"
pix.short[[i]] <- melt(notfullyear[[i]],id.vars="VALUE",variable.name="year",value.name="T")
names(pix.short[[i]])[names(pix.short[[i]])=="VALUE"] <- "day"
pix.shortNF[[i]] <- melt(wideNF[[i]],id.vars="VALUE",variable.name="year",value.name="T")
names(pix.shortNF[[i]])[names(pix.shortNF[[i]])=="VALUE"] <- "day"
}
#Check Lengths of Time Series
s.length=sapply(pix.short, function (x) length(x$T))
l.length=sapply(pix.long, function (x) length(x$T))
table(s.length)
table(l.length)
#Add Day Index Column
for (i in 1:30){
pix.long[[i]]$cumday <- seq.int(nrow(pix.long[[i]]))
pix.short[[i]]$cumday <- seq.int(nrow(pix.short[[i]]))
pix.shortNF[[i]]$cumday <- seq.int(nrow(pix.shortNF[[i]]))
}
#Turn Into Data Objects
devtools::use_data(pix.shortNF, overwrite = TRUE, compress="xz")
devtools::use_data(pix.short, overwrite = TRUE, compress = "xz")
devtools::use_data(pix.long, overwrite = TRUE, compress="xz")
#Clean GDD Data
folder <- "../smosfiles/data-raw/GDD/"
files = list.files(path = folder,
pattern = "*.csv")
filenames <- gsub("\\.csv.*","",files)
for (i in 1:length(files)){
assign(filenames[i],
read.csv(paste(folder, files[i], sep=''),header=TRUE)
)}
#Collect DataFrames into a List
list_df = lapply(filenames, get)
list_df<-lapply(
list_df,
function(x){
x$daily_low_f[x$daily_low_f<50] <- 50
x$daily_high_f[x$daily_high_f>86] <- 86
x$daily_high_f[x$daily_high_f<50] <- 50
x$gdd <- ((x$daily_high_f+x$daily_low_f)/2)-50
return(x)
}
)
#Make Year, Month, Day Seperate Columns; and add cumulative day index.
#Note 2012 was a Leap Year.
list_df <- lapply(
list_df,
function(x){
x$date2 <- ymd(x$date)
x$year <- year(x$date2)
x$month <- month(x$date2)
x$dat <- day(x$date2)
x$cumday <- seq.int(nrow(x))
return(x)
}
)
#Grab One Pixel, 194406
pix1 <- list_df[[1]]
#Assume crops are in ground May 1 for all years
pix1 <- pix1 %>%
mutate(planted = ifelse(date2 >"2010-05-01" & date2 <= "2010-12-31", 1,
ifelse(date2 > "2011-05-01" & date2 <= "2011-12-31", 1,
ifelse(date2 > "2012-05-01" & date2 <= "2012-12-31", 1,
ifelse(date2 > "2013-05-01" & date2 <= "2013-12-31", 1,
ifelse(date2 > "2014-05-01" & date2 <= "2014-12-31", 1,
ifelse(date2 > "2015-05-01" & date2 <= "2015-12-31", 1, 0)))))
)
)
#Not Sure Why this Isn't running cum sum over subset
#pix1 <- pix1 %>% group_by(year,planted) %>% mutate(out = ifelse(planted==1,cumsum(gdd),0))
pix1 <- ddply(pix1, .(year,planted), transform, Cumulative.Sum = cumsum(gdd))
pix1 <- pix1 %>% mutate(Cumulative.Sum = ifelse(planted==1,Cumulative.Sum,0))
#Remove Leap Year Day, Row 790
pix1 <- pix1[-790,]
#Join GDD with Tau Data
d <- smosfiles::pix.long
#Note: Not sure How to String Into One Long Series Given GDD aren't uniform acorss years.
pix1tau <- pix.long[[1]]
#Combine GDD Data Frame and Tau Data Frame for One Pixel
combo <- cbind(pix1,pix1tau)
#Get Valid Column Names
valid_column_names <- make.names(names=names(combo), unique=TRUE, allow_ = TRUE)
names(combo) <- valid_column_names
par(mfrow=c(3,2))
years <- seq(2010, 2015, by=1)
for (i in years){
combo10 <- combo %>% filter(year==i) %>% select(cumday,year, Cumulative.Sum, day, date, T)
#plot(combo10$Cumulative.Sum,combo10$T,ylab="T",xlab="GDD",main=i)
plot(combo10$day,combo10$T,ylab="T",xlab="Day",main=i)
}
plot(combo10$day,combo10$T)
plot(combo10$Cumulative.Sum,combo10$day,xlab="GDD",ylab="Day")
plot(combo10$day,combo10$Cumulative.Sum,xlab="Day",ylab="GDD")
plot(pix1tau$cumday,pix1tau$T)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.