# Options knitr::opts_chunk$set(results='asis', echo=FALSE, warning=FALSE) # Packages # library(knitr) # library(DT) # library(plotly) # library(ggplot2) # library(dplyr) # Data files strFile <- file.path(dir_data, file_data) data.import <- read.csv(strFile)
# Report Info myReportDate <- format(Sys.Date(),ContData.env$myFormat.Date) cat(paste("**Report Date:** ",myReportDate,"\n\n",sep="")) myUser <- Sys.getenv("USERNAME") cat(paste("**Generated By:** ",myUser,"\n\n",sep="")) #filename cat("**Filename:** ",file_data,"\n\n",sep="") mySiteID <- data.import[1,ContData.env$myName.SiteID] #cat(paste("**SiteID:** ",mySiteID,"\n\n",sep="")) myNumRecords <- nrow(data.import) # 20170228, mod from records 10 and 11 to half way point # myTimeDiff <- difftime(data.import[10,ContData.env$myName.DateTime] #,data.import[11,ContData.env$myName.DateTime],units="mins") #x <- data.import[,ContData.env$myName.DateTime] myT <- strptime(data.import[,ContData.env$myName.DateTime] ,format=ContData.env$myFormat.DateTime) myTimeDiff.all <- difftime(myT[-1],myT[-length(myT)],units="mins") myTimeDiff <- median(as.vector(myTimeDiff.all),na.rm=TRUE) cat(paste("\n\n**Period of Record, Actual:** " ,min(data.import[,ContData.env$myName.Date]) ," to " ,max(data.import[,ContData.env$myName.Date]) ,"\n\n" ,sep="")) cat(paste("**Recording Interval (estimated):** " ,myTimeDiff[1] ," minutes\n\n" ,sep="")) myParameters.ALL <- ContData.env$myNames.DataFields[ContData.env$myNames.DataFields %in% names(data.import)==TRUE] myParameters.Lab.ALL <- ContData.env$myNames.DataFields.Lab[ContData.env$myNames.DataFields %in% names(data.import)==TRUE] # Filter out Discrete myParameters <- myParameters.ALL[!grepl(ContData.env$myPrefix.Discrete,myParameters.ALL)] myParameters.Lab <- myParameters.Lab.ALL[ !grepl(ContData.env$myPrefix.Discrete ,myParameters.Lab.ALL)] #cat("**Parameters:** ",paste(myParameters.Lab,", ",sep=""),"\n\n",sep="") # above line not working, preventing pandoc conversion to WORD #cat("**Included Parameters:** \n\n") cat("**Parameters Included:** ",paste(myParameters.ALL ,", " ,sep="") ,"\n\n" ,sep="")
# Plot of range of data df_plot <- unique(data.import[, ContData.env$myName.Date, FALSE]) names(df_plot) <- "Date" df_plot[, "Date"] <- as.Date(df_plot[, "Date"]) # julian dates df_plot$Year <- format(df_plot[, "Date"], "%Y") df_plot$j <- as.numeric(format(df_plot[, "Date"], "%j")) df_plot$Date_Plot <- as.Date(as.numeric(format(df_plot[, "Date"] , "%j")) - 1 , origin = as.Date("2004-01-01")) # plot p_years <- ggplot2::ggplot(data = df_plot , ggplot2::aes(y = Year)) + ggplot2::scale_x_date(date_labels = "%b%d" , limits = as.Date(c("2004-01-01", "2004-12-31")) , date_breaks = "1 month" , expand = c(0, 0)) + ggplot2::labs(x = "Date" , y = "Year" , title = "Data by Year and Dates") + ggplot2::geom_line(ggplot2::aes(x = Date_Plot , y = Year) , size = 2 , na.rm = TRUE) + ggplot2::theme_bw() p_years cat("\n\n") # Convert time interval (minutes) to number per day records.expected <- round(24*60/as.numeric(myTimeDiff[1]),1) cat(paste("Estimated number of records per day is ",records.expected,".",sep="")) cat("\n\n") cat("Number of records by year and month (with totals).") cat("\n") myTable <- addmargins(table(data.import[,ContData.env$myName.Yr] ,data.import[,ContData.env$myName.Mo] , useNA = "ifany")) print(knitr::kable(myTable, format="markdown", row.names=TRUE)) cat("\n\n") # revert to longer table (20170228) cat("Number of records by day and month (with totals).") cat("\n") myTable <- addmargins(table(data.import[,ContData.env$myName.Day] ,data.import[,ContData.env$myName.Mo] , useNA = "ifany")) print(knitr::kable(myTable, format="markdown", row.names=TRUE)) cat("\n\n") # # pandoc.table(myTable.month.day.rec.GT15,style="rmarkdown") # # myTable.month.day.rec <- table(data.import[,ContData.env$myName.Mo] ,data.import[,ContData.env$myName.Day] , useNA = "ifany") # use apply function to count number of records not equal to the expected value cat("\n\n") # myTable.DateTime.N <- aggregate(data.import[,ContData.env$myName.SiteID] ~ data.import[,ContData.env$myName.DateTime] , data=data.import,FUN=length) # myTable <- myTable.DateTime.N[myTable.DateTime.N[,ContData.env$myName.SiteID]!=1,] # cat(paste0("Date and time records with more than one entry; N=",nrow(myTable),". Duplicate date.time records are shown below.")) # cat("\n") # print(knitr::kable(myTable, format="markdown", row.names=TRUE)) # mask error, 20170307 # Missing Dates myDateRange.Data <- seq(as.Date(min(data.import[,ContData.env$myName.Date])) ,as.Date(max(data.import[,ContData.env$myName.Date])) ,by="day") if(exists("fun.myData.DateRange.Start")==TRUE){ myDateRange.Target <- seq(as.Date(fun.myData.DateRange.Start) ,as.Date(fun.myData.DateRange.End) ,by="day") } else { myDateRange.Target <- myDateRange.Data } myDateRange.Target.Missing <- myDateRange.Target[!myDateRange.Target %in% myDateRange.Data] cat(paste0("There are " , length(myDateRange.Target.Missing) , "dates with missing data between the min (" , min(myDateRange.Data) , ") and max(" , max(myDateRange.Data) , ") for the provided data. \n\n" , "Below are the dates:")) cat("\n\n") print(ifelse(length(myDateRange.Target.Missing)==0,"",myDateRange.Target.Missing))
cat(paste("* **",ContData.env$myFlagVal.Pass,"** = Pass,",sep="")) cat("\n") cat(paste("* **",ContData.env$myFlagVal.Suspect,"** = Suspect,",sep="")) cat("\n") cat(paste("* **",ContData.env$myFlagVal.Fail,"** = Fail,",sep="")) cat("\n") cat(paste("* **",ContData.env$myFlagVal.NoData,"** = No Data or Not Applicable (NA).",sep=""))
Overall flags are assigned by examining the results of the four tests below.
cat(paste("* **",ContData.env$myFlagVal.Pass,"** = no Fail or Suspect and at least one Pass,",sep="")) cat("\n") cat(paste("* **",ContData.env$myFlagVal.Suspect,"** = no Fail and at least one Suspect,",sep="")) cat("\n") cat(paste("* **",ContData.env$myFlagVal.Fail,"** = at least one Fail,",sep="")) cat("\n") cat(paste("* **",ContData.env$myFlagVal.NoData,"** = all tests were Missing Data.",sep=""))
# Overall flags by paramter # (maybe combine into a single table) # skip for now # duplicated in by parameter section for missing data # # # for (j in 1:length(myParameters)) {##FOR.j.START # # # # #j.num <- match(j,myParameters) # # cat(paste(myParameters.Lab[j.num],"/n/n",sep="")) # # # ISSUE WITH knitr::kable INSIDE OF LOOPS # # cat(paste("## ",myParameters[j],"\n\n",sep="")) # colnum <- match(paste("Flag",myParameters[j],sep="."),names(data.import)) # # #print(myTable.Flags.Overall <- ftable(data.import[,colnum])) # myTable <- addmargins(table(data.import[,colnum] # , useNA = "ifany") # ,1) # print(knitr::kable(t(as.matrix(myTable)), format = "markdown")) # # future mod, 1,3,4,9 to Pass, Suspect, Fail, NA # # exclude=NA to get all # # future mod, 2nd line in table to include Percent of row total # # future mod, sort levels # # cat("\n\n") # # }##FOR.j.END
Number of expected values is estimated but this assumes equal sampling effort in all cases. This may not be the case for many data files.
# 3.1. Number of Records (revised 20170228) # Convert time interval (minutes) to number per day records.expected <- round(24*60/as.numeric(myTimeDiff[1]),1) cat(paste("The estimated number of records per day is ",records.expected,".",sep="")) cat("\n\n")
The number of existing values will be shown in a heat map and the number of dates with not the expected number of records is shown in a table.
# DT::datatable and plotly objects can only have one per chunk # this is why they don't render when generate them in a loop # 3. Data Info, by Parameter # Individual Data Type Report # QC i <- myParameters[1] for (i in myParameters) {##FOR.i.START # # i.num <- match(i,myParameters) myTitle.Sub <- myParameters.Lab[i.num] i.flag <- paste(ContData.env$myName.Flag, i, sep = ".") cat("## ",i,"\n\n",sep="") #cat(paste("**QC TESTS,",myTitle.Sub,sep="")) # 3.1. Flags, overall #cat(myTitle.Sub <- "### Flags") cat("\n\n") cat("#### Flag summary, overall number of records.") cat("\n\n") myTable <- addmargins(table(data.import[, i.flag] , useNA = "ifany") ,1) print(knitr::kable(t(as.matrix(myTable)), format = "markdown")) cat("\n\n") # # cat("Number of records by year and month (with totals)") cat("\n") myTable <- addmargins(table(data.import[,ContData.env$myName.Yr] ,data.import[,ContData.env$myName.Mo] , useNA = "ifany")) print(knitr::kable(myTable, format="markdown", row.names=TRUE)) cat("\n\n") # df_base <- data.import %>% dplyr::filter(!!as.name(i.flag) == "P") %>% dplyr::group_by(!!as.name(ContData.env$myName.MoDa) , !!as.name(ContData.env$myName.Yr)) %>% dplyr::summarise(n = dplyr::n(), .groups = "drop") # munge df_base$MonthDay <- sprintf("%04d", df_base$MonthDay) # change class df_base$MonthDay <- as.character(df_base$MonthDay) df_base$Year <- as.character(df_base$Year) # Plot, heat map str_title_hm <- paste0("Records with 'P'assing flags, ", mySiteID, ", ", i) p_hm <- ggplot2::ggplot(df_base , ggplot2::aes(x = !!as.name(ContData.env$myName.Yr) , y = !!as.name(ContData.env$myName.MoDa) , fill = n)) + ggplot2::geom_tile() + ggplot2::scale_fill_gradient(low = "orange", high = "dark green") + ggplot2::labs(title = str_title_hm) + ggplot2::scale_y_discrete(limits = rev) # display as plotly print(p_hm) #plotly::ggplotly(p_hm) # print(p_hm_ly) # without plotly use #scale_y_discrete( guide = ggplot2::guide_axis(check.overlap = TRUE)) cat("\n\n") # table of dates with missing data df_missing <- df_base %>% dplyr::filter(n != records.expected) %>% dplyr::select(Year, MonthDay, n) %>% dplyr::arrange(Year, MonthDay) cat <- paste0("Dates with Potential Missing Records (expected = " , records.expected , ").") # print(DT::datatable(df_missing # , filter = "top" # , caption = cat # , options = list(scrollX = TRUE # , scrollY = TRUE # , pageLength = 30 # , lengthMenu = c(15, 30, 60, 90, 120, 180, 366) # , autoWidth = TRUE # ))) cat("\n\n") print(knitr::kable(df_missing, caption = cat)) cat("\n\n") # cat(knitr::knit_print(DT::datatable(mtcars))) cat("Number of records by day and month (with totals)") cat("\n") myTable <- addmargins(table(data.import[,ContData.env$myName.Day] ,data.import[,ContData.env$myName.Mo] , useNA = "ifany")) #table, kable (static) #print(knitr::kable(myTable, format="markdown", row.names=TRUE)) # table, DT (interactive) DT::datatable(as.data.frame.matrix(myTable) , filter = "top" , caption = "Records by day and month" , options = list(scrollX = TRUE , scrollY = TRUE , lengthMenu = c(10, 20, 32) , autoWidth = TRUE )) cat("\n\n") # # identify days/months where not the expected number of records # (expect first and last day) # print("days where not the expected number of records") # #~~~~~~~~~~~~ # New, 20220616 # OTHER # Pivot df_plot <- tidyr::pivot_wider(df_base , names_from = ContData.env$myName.Yr , values_from = "n") cat("\n\n") # # table, DT (interactive) # add color code (https://rstudio.github.io/DT/010-style.html) brks <- quantile(df_plot[, -1], probs = seq(.05, .95, .05), na.rm = TRUE) clrs <- round(seq(255, 40, length.out = length(brks) + 1), 0) %>% {paste0("rgb(255,", ., ",", ., ")")} cat <- "Records by month-day and year" print(knitr::kable(df_plot, caption = cat)) DT::datatable(df_plot , filter = "top" , caption = cat , options = list(scrollX = TRUE , scrollY = TRUE , pageLength = 30 , lengthMenu = c(15, 30, 60, 90, 120, 180, 366) , autoWidth = TRUE )) # coloring doesn't look right # %>% # formatStyle(names(df_plot)[-1] # , backgroundColor = styleInterval(brks, clrs)) # cat("\n\n") }##FOR.i.END
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.