mydata <- params$mydata_to_report
report_mydata <- params$report_mydata
station_name <- params$station_name
errors_list_critical <- params$errors_list_critical
errors_list_warning <- params$errors_list_warning
errors_list_report_errors <- params$errors_list_report_errors
dqc_date <- params$dqc_date
variables_flagged <- params$variables_flagged

Station: r station_name

Data Quality Check : r dqc_date


if(errors_list_critical$err_empty$Status == "N" &
   errors_list_critical$err_structure$Status == "N" &  
   errors_list_critical$err_overlap$Status == "N" & 
   errors_list_critical$err_no_new_data$Status == "N" ){ 

   # errors_list_critical$err_logger_number$Status == "N" &
   # errors_list_critical$err_missing_record$Status == "N" & 
   # errors_list_critical$err_restart_record$Status == "N"

  is.POSIXct <- function(x) inherits(x, "POSIXct")
  s = sapply(mydata, is.POSIXct)
  w = which(s == TRUE)

  end_date =  trunc(as.POSIXct(mydata[nrow(mydata),w], tz = "Etc/GMT-1"),units = "hours")
  dqc_date_2 = trunc(as.POSIXct(dqc_date, tz = "Etc/GMT-1"),units = "hours")

  hours_diff = as.numeric(difftime(time1 = dqc_date_2, time2 = end_date, tz = "Etc/GMT-1",units = "hours"))

  if(hours_diff > 24){
    text = "The station is currently OFFLINE! (>24h)" 
    cat("\\newpage")
    cat("\n \n")
    cat(paste("<font color='red' size = '5'>","**",text,"**","</font>",sep=""),'\n\n')
    cat('\n')
  }else{
    text = paste("The last data was downloaded at:",mydata[nrow(mydata),w])
    cat("\\newpage")
    cat("\n \n")
    cat(paste("<font color='green' size = '5'>","**",text,"**","</font>",sep=""),'\n\n')
    cat('\n')
  }

  cat("\\newpage")
  cat("\n \n")

  cat(paste("<font size = '3'>","Checked data from ","<strong><u>",mydata[1,w],"</u></strong>", " to ","<strong><u>",mydata[nrow(mydata),w],"</u></strong>","</font>",sep=""),'\n\n')

  # cat(paste("From ","**",mydata[1,w],"**", "to", "**",mydata[nrow(mydata),w],"**",sep = " " ),'\n\n')
  cat('\n')
}
# names(errors_list_warning)

# names(errors_list_critical)

# str(errors_list_critical)
# str(errors_list_warning)
# str(errors_list_report_errors)

r if(errors_list_critical$err_empty$Status == "Y"){ paste("File empty!")}

r if(errors_list_critical$err_empty$Status == "Y"){ paste("The file of station",station_name, "is empty.")}

r if(errors_list_critical$err_logger_number$Status == "Y"){ paste("Logger information changed!")}

if(errors_list_critical$err_logger_number$Status == "Y"){ 
  # datatable(errors_list_critical$err_structure$Values)
  kable(errors_list_critical$err_logger_number$Values)
}

r if(errors_list_critical$err_structure$Status == "Y"){ paste("File wrong structure!")}

if(errors_list_critical$err_structure$Status == "Y"){ 
  # DT::datatable(errors_list_critical$err_structure$Values)
  kable(errors_list_critical$err_structure$Values)
}

r if(errors_list_critical$err_structure_change$Status == "Y"){ paste("File structure changed!")}

if(errors_list_critical$err_structure_change$Status == "Y"){ 
  # DT::datatable(errors_list_critical$err_structure$Values)
  struct_change = errors_list_critical$err_structure_change$Values

  modified = struct_change[which(struct_change$Row != "Header"),]
  modified = modified[which(modified$Row != "Headers reorded"),]
  colnames(modified)[1] = "Header"

  if(nrow(modified)>0){
    print( kable(modified,row.names = F, caption = "Units or Sampling time modified:"))    
  }

  headers = struct_change[which(struct_change$Row == "Header"),]

  old_headers = headers[which(headers$Old != ""),]
  old_headers = old_headers[,-c(2,4)]
  old_headers$Column = as.numeric( old_headers$Column )
  # if(nrow(old_headers)>0){
  #   # t1 = kable(old_headers,row.names = F, caption = "Old headers removed (or modified):",booktabs = TRUE)
  #   # print( kable(old_headers,row.names = F, caption = "Old headers removed (or modified):"))
  # }

  new_headers = headers[which(headers$New != ""),]
  new_headers = new_headers[,-c(2,3)]
  new_headers$Column = as.numeric( new_headers$Column )

  # if(nrow(new_headers)>0){
  #   # t2 = kable(new_headers,row.names = F, caption = "New headers added:" ,booktabs = TRUE)
  #   # print( kable(new_headers,row.names = F, caption = "New headers added:"))
  # }

  m = merge(old_headers,new_headers,all = T)
  if(nrow(m)>0){
    v_col = as.numeric(m$Column)

    m = m[match(v_col,as.numeric(m$Column)),]
    m$Old[is.na(m$Old)] = ""
    m$New[is.na(m$New)] = ""
    m$Column = as.character(m$Column)
    print( kable(m,row.names = F, caption = "Headers removed/added:"))
  }

  reordered = struct_change[which(struct_change$Row == "Headers reorded"),]
  if(nrow(reordered) > 0){
    cat("Header reordered!")
    cat("  \n")
  }

  # # struct_change = struct_change[order(struct_change$Column),]
  # v_col = as.numeric(substring(struct_change$Column,5,nchar(struct_change$Column)))
  # v_col[order(v_col)]
  # struct_change = struct_change[v_col[order(v_col)],]
  # 
  # 
  # 
  # 
  # 
  # u = unique(struct_change$Column)
  # df_u = data.frame(matrix(ncol = 2, nrow = length(u)))
  # colnames(df_u) = c("col", "n")
  # k=1
  # for(k in 1:length(u)){
  #   df_u$col[k] = u[k]  
  #   df_u$n[k] = length(struct_change$Column[which(struct_change$Column == u[k])])  
  #   
  # }
  # 
  # struct_change_new = struct_change[ struct_change$Column %in%  df_u$col[which(df_u$n != 3)],]
  # struct_change_add_rem = struct_change[ struct_change$Column %in%  df_u$col[which(df_u$n == 3)],]
  # 
  # h_a = struct_change_add_rem[which(struct_change_add_rem$New != ""),]
  # h_added = as.data.frame(h_a$New[which(h_a$Row == "Header")])
  # colnames(h_added) = "New"
  # 
  # h_r = struct_change_add_rem[which(struct_change_add_rem$Old != ""),]
  # h_removed = as.data.frame(h_r$Old[which(h_r$Row == "Header") ])
  # colnames(h_removed) = "Old"
  # 
  # 
  # # if(any(struct_change$Row == "Header")){
  # h_change = struct_change_new[struct_change_new$Row == "Header",-which(colnames(struct_change_new)== "Row")]
  # h_modif = h_change[order(h_change$Column),]
  # 
  # v_col = as.numeric(substring(h_modif$Column,5,nchar(h_modif$Column)))
  # h_modif$Column = paste("col_",v_col+1,sep = "")
  # 
  # u_change = struct_change_new[struct_change_new$Row != "Header",]
  # u_modif = u_change[order(u_change$Column),]
  # 
  # w_col = as.numeric(substring(u_modif$Column,5,nchar(u_modif$Column)))
  # u_modif$Column = colnames(mydata)[w_col+1]
  # 
  # if(nrow(h_added)>0){
  #   # h_add =as.data.frame(h_added)
  #   # colnames(h_add) = "New"
  #   print( kable(h_added,row.names = F, caption = "New headers added:"))    
  #   # cat("  \n")
  # }
  # 
  # if(nrow(h_removed)>0){
  #   # h_rem =as.data.frame(h_removed)
  #   # colnames(h_rem) = "Old"
  #   print( kable(h_removed,row.names = F, caption = "Headers removed:"))    
  #   # cat("  \n")
  # }
  # 
  # if(nrow(h_modif)>0){
  #   h_mod =as.data.frame(h_modif[,-1])
  #   print( kable(h_mod,row.names = F, caption = "Headers modified:"))    
  #   # cat("  \n")
  #   print( kable(u_modif,row.names = F, caption = "Units or Sampling method modified:"))    
  #   # cat("  \n")
  #   
  # }

  # kable(errors_list_critical$err_structure_change$Values)
}

r if(errors_list_critical$err_no_new_data$Status == "Y"){ paste("No new data!")}

r if(errors_list_critical$err_no_new_data$Status == "Y"){paste("Date issue! Date download table (DT): ",as.character(errors_list_critical$err_no_new_data$Values[1]),". Last date in data file (LT): ",as.character(errors_list_critical$err_no_new_data$Values[2]),". Suggestion: LT should be later than DT", sep="")}

r if(errors_list_critical$err_overlap$Status == "Y"){ paste("Data overlap!")}

if(errors_list_critical$err_overlap$Status == "Y"){ 
  # df_overlap  = data.frame("overlap:",errors_list_critical$err_overlap$Values)
  # colnames(df_overlap) = c("", "Date")
  df_overlap = errors_list_critical$err_overlap$Values
  # DT::datatable(df_overlap,rownames = F)
  kable(df_overlap,rownames = F)
}

r if(errors_list_critical$err_missing_record$Status == "Y"){ paste("Data gap, no record!")}

if(errors_list_critical$err_missing_record$Status == "Y"){ 
  # DT::datatable(errors_list_critical$err_missing_record$Values)
  kable(errors_list_critical$err_missing_record$Values)
}

r if(errors_list_critical$err_restart_record$Status == "Y"){ paste("Data gap, restart record!")}

if(errors_list_critical$err_restart_record$Status == "Y"){ 
  # DT::datatable(errors_list_critical$err_restart_record$Values)
  kable(errors_list_critical$err_restart_record$Values)
}

r if(errors_list_critical$err_date_missing$Status == "Y"){ paste("Data gap!")}

if(errors_list_critical$err_date_missing$Status == "Y"){ 

  time_mydata = as.POSIXct(report_mydata[, which(colnames(report_mydata) == "TIMESTAMP")], tz = "Etc/GMT-1")

  mydata_df = report_mydata
  date_filled = ifelse(mydata_df$RECORD != -1,F,T)

  date_classification = data.frame(time_mydata, date_filled)
  colnames(date_classification) = c("TIMESTAMP","Classification")
  melt_date_classification = melt(date_classification,id.var = "TIMESTAMP")
  melt_date_classification$variable = rep(0,length(melt_date_classification$variable))
  melt_date_classification$value = as.factor(melt_date_classification$value)

  table = errors_list_critical$err_date_missing$Values

  table$From = as.POSIXct(as.character(table$From),tz = "Etc/GMT-1")-15*60/2
  table$To = as.POSIXct(as.character(table$To),tz = "Etc/GMT-1")+15*60/2

  g0 = ggplot()+
    geom_rect(data = table,aes(xmin = From, xmax = To, ymin = -Inf, ymax = Inf),alpha = 0.25)+
    geom_point(data = melt_date_classification, aes(x = TIMESTAMP, y = variable, colour = value))+
    scale_colour_manual(name = "",values = c("green", "red"))+
    ggtitle(paste("Missing dates"),subtitle = paste(paste("From:",min(melt_date_classification$TIMESTAMP), "to", max(melt_date_classification$TIMESTAMP)),
                                                    paste("Total missing dates:", sum(table$`Number of Record`,na.rm = T),"records - " , 15*sum(table$`Number of Record`,na.rm = T)/60, "hours"),sep = "\n"))+
    scale_x_datetime(date_labels = "%b %d",date_minor_breaks = "1 day") +
    scale_y_continuous(name = "")+
    theme_bw()+
    theme(axis.title.y=element_blank(),
          axis.text.y=element_blank(),
          axis.ticks.y=element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank())

  # variables_flagged

  knit_print(g0)

  # print(datatable(errors_list_warning$err_date_missing$Values))
  kable(errors_list_critical$err_date_missing$Values)
}

r if(length(variables_flagged) != 0){ paste("Silent Variables:")}

if(length(variables_flagged) != 0){
  df_var = as.data.frame(variables_flagged)
  # colnames(df_var) = ""
  df_var = cbind(rep("-",times = nrow(df_var)),df_var)
  colnames(df_var) = c("","")

  # kable(df_var)
  cat(hwrite(df_var, border=0, center=TRUE,  width='1000px', row.names=FALSE, row.style=list('font-weight:bold')))

}

r if(errors_list_warning$err_range_alert$Status == "Y"){ paste("Data out of range!")}

if(errors_list_warning$err_range_alert$Status == "Y"){
  table =errors_list_warning$err_range_alert$Values
  # table$Variable = as.factor(table$Variable)
  vars = unique(table$Variable)

  w = which(colnames(report_mydata) %in% vars)
  vars = colnames(report_mydata)[w]

  time_mydata = as.POSIXct(report_mydata[, which(colnames(report_mydata) == "TIMESTAMP")], tz = "Etc/GMT-1")

  for(i in 1:length(vars)){
    tab_var_v0 = table[which(table$Variable == vars[i]),]

    tab_var = tab_var_v0
    tab_var$From =  as.POSIXct(tab_var$From,tz = "Etc/GMT-1")-15*60/2
    tab_var$To =  as.POSIXct(tab_var$To,tz = "Etc/GMT-1")+15*60/2
    tab_var$Hours =  as.numeric(tab_var$Hours)

    tab_var$Error = factor(tab_var$Error,levels = c("NaN_value","Too_low","Too_high"))

    mydata_var = report_mydata[,vars[i]]
    mydata_df = data.frame(time_mydata, mydata_var)
    colnames(mydata_df) = c("TIMESTAMP",vars[i])

    melt_mydata = melt(mydata_df,id.var = "TIMESTAMP")
    melt_mydata$value = as.numeric(melt_mydata$value)


    g1 = ggplot()+
      geom_rect(data = tab_var,aes(xmin = From, xmax = To, ymin = -Inf, ymax = Inf, fill = Error),alpha = 0.3)+
      geom_line(data = melt_mydata, aes(x = TIMESTAMP, y = value))+
      scale_fill_manual(name = "",values = c("grey20","blue", "red"), labels = c("NaN_value","Too_low","Too_high"),drop = F)+
      # ggtitle(paste("Variable:", vars[i]),subtitle = paste(paste("From:",min(melt_mydata$TIMESTAMP), "to", max(melt_mydata$TIMESTAMP)),
      #                                                      paste("Total NaN:",sum(tab_var$Hours[which(tab_var$Error == "NaN_value")], na.rm = T),"hours"),   # cambiare modo sommare
      #                                                      paste("Total low data:",sum(tab_var$Hours[which(tab_var$Error == "Too_low")], na.rm = T),"hours"),
      #                                                      paste("Total high data:",sum(tab_var$Hours[which(tab_var$Error == "Too_high")], na.rm = T),"hours"),sep = "\n"))+
      ggtitle(vars[i])+
      scale_x_datetime(date_labels = "%b %d",date_minor_breaks = "1 day") +
      scale_y_continuous(name = vars[i])+
      theme_bw()+
      theme(panel.grid.major = element_blank(),
            panel.grid.minor = element_blank(),
            plot.title = element_text(hjust = 0.5, face="bold",size=28))

    # d1 = data.frame(c(1,2),c(2,5))
    # colnames(d1) = c("x","y")
    # g1 = ggplot(data = d1, aes(x,y))+geom_point()+geom_line()+ggtitle(vars[i])
    print(g1)

    # print(xtable(tab_var_v0[order(tab_var_v0$From),]),type = "html")
    cat(hwrite(tab_var_v0[order(tab_var_v0$From),-1], border=0, center=TRUE,  width='1000px', row.names=FALSE, row.style=list('font-weight:bold')))
    # print(kable(tab_var_v0[order(tab_var_v0$From),],row.names = F),type = "html")
    # print( htmltools::tagList(datatable(tab_var_v0[order(tab_var_v0$From),])))
    cat (c("------------------------------------------------------------------------"),sep = "\n")
  }


  # kable(errors_list_report_errors$err_out_of_range$Values)
}


bridachristian/DataQualityCheckEuracAlpEnv documentation built on Oct. 27, 2019, 5:55 p.m.