R/covid19_anal.R
In covid19.analytics: Load and Analyze Live Data from the COVID-19 Pandemic

Documented in growth.rate mtrends report.summary single.trend tots.per.location

# Analysis fns of the covid19.analytics package
#
# M.Ponce


#######################################################################


#######################################################################

tots.per.location <- function(data, geo.loc=NULL, confBnd=FALSE, nbr.plts=1, info="") {
#' function to compute totals per location
#'
#' @param  data  data.frame with *time series* data from covid19
#' @param  geo.loc  list of locations
#' @param  confBnd  flag to activate/deactivate drawing of confidence bands base on a moving average window
#' @param  nbr.plts  parameter to control the number of plots to display per figure
#' @param  info  additional info to display in plots' titles
#'
#' @return  a list or dataframe with totals per specified locations and type of case
#'
#' @export
#'
#' @importFrom  grDevices  rainbow
#' @importFrom  graphics   barplot par plot abline axis text
#'
#'
#' @examples
#' 
#' # read data for confirmed cases
#' data <- covid19.data("ts-confirmed")
#' # compute totals per location for all the countries
#' \donttest{
#' tots.per.location(data)
#' }
#' # compute totals per location for 'Italy'
#' tots.per.location(data,geo.loc="Italy")
#' # compute totals per location for 'Italy' and 'Germany'
#' tots.per.location(data,geo.loc=c("Italy","Germany"))
#'

	# check that the data is time series
	chk.TS.data(data,xtp=TRUE)

	# first column with daily data
	col1 <- 5

	# check the location indicated
	geo.loc <- checkGeoLoc(data,geo.loc)

	# dataframe to store results
	total.cases.per.country <- data.frame()

	### preserve user graphical env.
	# save the state of par() before running the code
	oldpar <- par(no.readonly = TRUE)
	# restore the previous state after the fn is done, even if it fails, so the user environment is not altered
	on.exit(par(oldpar))
	#########
	# set some graphical parameters
	set.plt.canvas(geo.loc,nbr.plts*2)
	###
	if (length(geo.loc) > 5) par(mar=c(1,1,1,1))
	#########


	# depricated
	# if the dataset contains all the posisble cases ('confirmed','recovered','deaths')
	# process them separatedly by using recursion...
	if ("status" %in% tolower(names(data))) {
		colN <- ncol(data)
		results.lst <- list()
		for (sts in unique(data$status) ) {
			message("Processing ",sts, " cases")
			results.lst <- list(results.lst, tots.per.location(data[data$status==sts,-colN],geo.loc,confBnd,nbr.plts,sts))
		}
		return(results.lst)
	}
	#######

	#######################
	# Aggregated (new) structure
	conditions <- c("confirmed","deaths","recovered","active")
	results.lst <- list()
	Country.col <- pmatch("Country", names(data))
	Province.col <- pmatch("Province", names(data))
	for (cond in conditions) {
		if ( (cond %in% tolower(names(data))) & ("FIPS" %in% names(data)) ) {
			col.X <- pmatch(cond, tolower(names(data)))
			message("Processing ",cond, " cases")
			results.lst <- list(results.lst, tots.per.location(data[,c(Province.col,Country.col,col.X)],geo.loc,confBnd,nbr.plts,cond))
		}
		if (length(results.lst) > 0)
			return(results.lst)
	}
	#######################
	#######################


	for (i in geo.loc) {
		cases.per.loc <- select.per.loc(data,i)
		colN <- ncol(cases.per.loc)
		if (tolower("status") %in% tolower(colnames(cases.per.loc))) {
			warning("Some internal inconsistency ocurred!",'\n',"Please check with the developer")
		} else {
			#print(cases.per.loc[,col1:colN])
			totals.per.loc.day <- apply(cases.per.loc[,col1:colN],MARGIN=2,sum)
			#print(totals.per.loc.day)
			if (toupper(i) != "ALL") {
				totals.per.loc <- sum(cases.per.loc[1:nrow(cases.per.loc),colN])
			} else {
				totals.per.loc <- apply(cases.per.loc[,col1:colN],MARGIN=2,sum)
			}
			cat(i,' -- ',totals.per.loc,'\n')
			total.cases.per.country <- rbind(total.cases.per.country,c(i,totals.per.loc.day,totals.per.loc))


			##### modelling
			yvar <- totals.per.loc.day
			#print(yvar)

			## LM models
			header("="," running models...")
			model.lm <- genModel(yvar,deg=1)
			model.exp <- genModel(log1p(yvar),deg=1)
			#print(str(model2))
			## GLM models
			model.poisson <- gen.glm.model(yvar,family="poisson")
			if (sum(yvar<=0)==0) model.gamma <- gen.glm.model(yvar)

			##### plots
			Ncols <- length(cases.per.loc)
			Nrows <- nrow(cases.per.loc)
			col0 <- 5
			x.dates <- as.Date(names(cases.per.loc)[col0:Ncols])
			y.cases <- cases.per.loc[1:Nrows , col0:Ncols]
			y.cases <- unlist(yvar)
			#names(y.cases) <- x.dates
			my.cols <- rep(rainbow(15L),each=20L)
			#print(x.dates)
			#print(y.cases)
			yvarlog1p <- log1p(unlist(y.cases))
			xvar <- 1:length(yvarlog1p)
			plot(log1p(unlist(y.cases)),
				#xlim=c(1,length(yvar)),
				#ylim=c(0,max(yvar,na.rm=TRUE)),
				type='b', xlab="days", ylab='nbr.of.cases (log)', pch=16L,col=my.cols)

			## adding models to plot
			#abline((model1), col='blue')
			abline((model.exp), col='red')
			abline(model.poisson, col='blue')
			if (sum(yvar<=0)==0) abline(model.gamma, col='green')
			# models legend
			#print(.9*max(yvarlog1p+0.1))
			text(.3*Ncols,0.8*max(yvarlog1p+0.1),
				paste("exp.model coefs: ",round(model.exp$coefficients[1],digits=3),";",round(model.exp$coefficients[2],digits=3)))
			text(.3*Ncols,0.725*max(yvarlog1p+0.1),
				paste("GLM-Poisson model coefs: ",round(model.poisson$coefficients[1],digits=3),";",round(model.poisson$coefficients[2],digits=3)))
			if (sum(yvar<=0)==0) text(0.3*Ncols,0.65*max(yvarlog1p+0.1),
				paste("GLM-Gamma model coefs: ",round(model.gamma$coefficients[1],digits=3),";",round(model.gamma$coefficients[2],digits=3)))

			### add confidence band based on moving Avg
			if (confBnd)
				confBand(xvar,yvarlog1p, 1,length(yvarlog1p),0,max(yvarlog1p,na.rm=TRUE), windowsNbr=10, lcolour='violet')

			#par(new=TRUE)
			barplot(unlist(y.cases), main=paste(i,info), col = my.cols)
			#confBand(xvar,yvar, 1,length(yvar),0,max(yvar,na.rm=TRUE), windowsNbr=10)
			# models legend
			text(.2*Ncols,0.85*max(unlist(y.cases)), paste("lm-exp GR = ",round(exp(model.exp$coefficients[2]),digits=2)))
			text(.2*Ncols,0.75*max(unlist(y.cases)), paste("glm-Poisson GR = ",round(exp(model.poisson$coefficients[2]),digits=2)))
			if (sum(yvar<=0)==0) text(.2*Ncols,0.65*max(yvar), paste("glm-Gamma GR = ",round(exp(model.gamma$coefficients[2]),digits=2)))
			#par(new=FALSE)
		}
	}

	# set the name of the columns
	names(total.cases.per.country) <- names(cases.per.loc[,col1:colN-1])
	# add the location
	total.cases.per.country <- cbind(geo.loc, total.cases.per.country)

	#plot(unlist(data[ data$Country.Region==i ,5:52]))
	return(invisible(total.cases.per.country))
}


#############################################################################


growth.rate <- function(data0, geo.loc=NULL, stride=1, info="", staticPlt=TRUE, interactiveFig=FALSE, interactive.display=TRUE) {
#' function to compute daily changes and "Growth Rates" per location; "Growth Rates" defined as the ratio between changes in consecutive days
#'
#' @param  data0  data.frame with *time series* data from covid19
#' @param  geo.loc  list of locations
#' @param  stride  how frequently to compute the growth rate in units of days
#' @param  info  additional information to include in plots' title
#' @param  staticPlt  boolean flag to indicate whether static plots would be generated or not
#' @param  interactiveFig  boolean flag to indicate whether interactice figures would be generated or not
#' @param  interactive.display  boolean flag to indicate whether the interactive plot will be displayed (pushed) to your browser
#'
#' @return  a list containing two dataframes: one reporting changes on daily baisis and a second one reporting growth rates, for the indicated regions
#'
#' @importFrom  gplots  heatmap.2 bluered
#' @importFrom  pheatmap  pheatmap
#'
#' @export
#'
#' @examples
#' ###\donttest{
#' # read data for confirmed cases
#' data <- covid19.data("ts-confirmed")
#' # compute changes and growth rates per location for all the countries
#' # growth.rate(data)
#' # compute changes and growth rates per location for 'Italy'
#' growth.rate(data,geo.loc="Italy")
#' # compute changes and growth rates per location for 'Italy' and 'Germany'
#' growth.rate(data,geo.loc=c("Italy","Germany"))
#' ###}


	# check that the data is time series
	chk.TS.data(data0,xtp=TRUE)

	# define first column of data
	col1 <- 5

	cluster.type <- c("ALL","Country","City","Region","City/Region")

	# check on the location
	geo.loc <- checkGeoLoc(data0,geo.loc)

	# where to store the results
	total.changes.per.day <-data.frame() 
	total.gr.per.day <- data.frame()

	### preserve user graphical env.
	# save the state of par() before running the code
	oldpar <- par(no.readonly = TRUE)
	# restore the previous state after the fn is done, even if it fails, so the user environment is not altered
	on.exit(par(oldpar))

	# set graphical output parameters
	set.plt.canvas(geo.loc,2)
	###
	if (length(geo.loc) > 5) par(mar=c(1,1,1,1))
	#########


	if ("status" %in% names(data0)) {
		results <- list()
		for (i in unique(data0$status) ) {
			message("Considering ",i, " cases")
			data <- data0[ data0$status == i, ]
			data <- data[, ! names(data) %in% "status", drop = F]
			result.per.case <- growth.rate(data,geo.loc,stride, i, staticPlt=staticPlt, interactiveFig=interactiveFig, interactive.display=interactive.display)
			results <- list(result.per.case, results)
		}
		return(results)
	} else {
		data <- data0
	}


	for (i in geo.loc) {
		# check whether the locations are countries/regions or provinces/states
		cases.per.loc <- select.per.loc(data,i)

                colN <- ncol(cases.per.loc)
                if (tolower("status") %in% tolower(cases.per.loc))
                         colN <- colN - 1

		cat("Processing... ",i,'\n')

		totals.per.loc <- apply(cases.per.loc[,col1:colN],MARGIN=2,sum)
		#print(totals.per.loc)

		# determine period of time and ranges...
		nbr.of.days <- length(totals.per.loc)
		range1 <- seq(1,nbr.of.days-1,stride)
		range2 <- range1+1

		# compute changes...
		changes <- totals.per.loc[range2]-totals.per.loc[range1]
		#print(changes)

		# compute growth rate
		gr.rate <- changes[range2]/changes[range1]
		# remove infinites in case of divison by 0 or something tiny...
		gr.rate[gr.rate == "Inf"] <- NA
		gr.rate[gr.rate == "-Inf"] <- NA
		#print(gr.rate)

		# update resulting dataframe
		total.changes.per.day <- rbind(total.changes.per.day, changes)
		total.gr.per.day <- rbind(total.gr.per.day, gr.rate)

		if (staticPlt) {
		# some graphic output...		
		my.cols <- rep(rainbow(15L),each=20L)
		x.dates <- as.Date(names(totals.per.loc[2:length(totals.per.loc)]))
		plot(x.dates,changes, type='b', xlab="time",ylab="Nbr of Changes", col=my.cols)
		#lines(x.dates,exp(model2$coefficients[2]*seq_along(x.dates))*model2$coefficients[1], col='blue')
		par(new=TRUE)
		plot(x.dates,log1p(changes), ylab='',xlab='', main=paste(i,info), type='b', pch=8, cex=.3, col=my.cols, lwd=2, lty=1, axes=FALSE)
		axis(4)
		par(new=FALSE)
		# check to see if there is enough data to display gr.rate
		if (sum(changes==0) < length(changes)-1) {
			# mask for avoiding blowing-up the plots and set the yMaximum value
			mask <- (!is.infinite(gr.rate) & !is.nan(gr.rate) & !is.na(gr.rate))
			yMax <- max(gr.rate[mask],na.rm=TRUE)
			plot(x.dates,unlist(gr.rate),
				axes=FALSE, xlab='',ylab='', ylim=c(0,yMax*1.05),	#max(gr.rate,na.rm=TRUE)*1.05),
				main=paste(i,info), type='b', col=my.cols)
			par(new=TRUE)
			barplot(unlist(gr.rate), ylab="Growth Rate",xlab="time",col = my.cols, ylim=c(0,yMax*1.05))
			axis(side=1,labels=FALSE)
			#axis.Date(side=1,x.dates)
			#box()
			par(new=FALSE)
		} else {
			message("Not enough data to compute growth rate for ",i)
		}
		}
	}

	if ( (nrow(total.changes.per.day)>=1) && (length(geo.loc)>=1) )  {

		names(total.changes.per.day) <- names(changes)
		total.changes.per.day <- cbind(geo.loc, total.changes.per.day)

		names(total.gr.per.day) <- names(gr.rate)
		total.gr.per.day <- cbind(geo.loc, total.gr.per.day)

	if (staticPlt) {
		# load some graphical libraries 
                loadLibrary("pheatmap")
                loadLibrary("gplots")

		par(mfrow=c(1,2))
		if (nrow(total.changes.per.day) > 1) {
			mat.tgt <- (as.matrix(total.changes.per.day[,2:length(changes)]))
			# deal with NA/nan/... setting to 0
			mat.tgt[is.na(mat.tgt)] <- 0

			# Heatmap.2
			heatmap.2(mat.tgt,
				dendrogram="none", trace='none',
				col=bluered, labRow=geo.loc,
				main=paste("Changes per day",info) )

			# pheatmap
			pheatmap(mat.tgt,
				cluster_rows = FALSE, cluster_cols = FALSE,
				scale = 'none',
				annotation_legend = FALSE,
				drop_levels       = TRUE,
				fontsize          = 8,
				labels_col = "",
				labels_row = geo.loc,
				display_numbers = FALSE)

			# R basic heatmap
			#heatmap(mat.tgt,
			#	main=paste("Changes per day",info),
			#	labRow=geo.loc, Colv=NA,Rowv=NA,
			#	scale="column",
			#	col = topo.colors(256) )
		}

		if (nrow(total.gr.per.day) > 1) {
			mat.tgt <- (as.matrix(total.gr.per.day[,2:length(gr.rate)]))
			#mat.tgt <-  mat.tgt[,which(unlist(lapply(mat.tgt, function(x) !all(is.na(x)))))]
			#mat.tgt <-  mat.tgt[,which(unlist(lapply(mat.tgt, function(x) !all(is.nan(x)))))]
			# deal with NA/nan/... setting to 0
			mat.tgt[is.na(mat.tgt)] <- 0
			mat.tgt[is.nan(mat.tgt)] <- 0

			heatmap.2(mat.tgt,
				dendrogram="none", trace='none',
				col=bluered, labRow=geo.loc,
				main=paste("Growth Rate",info) )

			# pheatmap
			pheatmap(mat.tgt,
				cluster_rows = FALSE, cluster_cols = FALSE,
				display_numbers=FALSE,
				labels_col = "",
				labels_row = geo.loc)

			# R basic heatmap
			#heatmap(mat.tgt,
			#	main=paste("Growth Rate",info),
			#	labRow=geo.loc, Colv=NA,Rowv=NA,
			#	scale="column",
			#	col = rainbow(256) )
		}
	}
}
	if (interactiveFig) {
		loadLibrary("plotly")
		p1 <- plot_ly(z=t(total.changes.per.day),x=total.changes.per.day['geo.loc'][[1]],y=names(total.changes.per.day), type='surface', scene='scene1')
		p2 <- plot_ly(z=t(total.changes.per.day),x=total.changes.per.day['geo.loc'][[1]],y=names(total.changes.per.day), type='heatmap')
		p3 <- plot_ly(z=t(total.gr.per.day), x=total.gr.per.day['geo.loc'][[1]], y=names(total.changes.per.day)[-1], type='heatmap')
		#p3 <- layout(p3, scene = list(zaxis = list(title = "Growth Rate", range = c(-4,4)), yaxis = list(title = "B"), xaxis = list(title = "C")))
		if (interactive.display) {
					print(p1)
					print(p2)
					print(p3)
		} else {
			# compose one plot

			ifig <- subplot(p2, p1, p3)#, widths=c(0.3,0.4,0,.3), ncols=3)
			ifig <- ifig %>% layout(title = "Daily Changes & Growth Rate",
         			scene1 = list(domain=list(x=c(0,0.5),y=c(0.75,1)),
                      				aspectmode='cube')
				#scene2 = list(domain=list(x=c(0,0.5),y=c(0,0.5)),
 		                #     aspectmode='data'),
				#scene3 = list(domain=list(x=c(0.5,1),y=c(0,0.5)),
				#	aspectmode='data')
					)

			#ifig <- subplot(list(subplot(p1,p2),p3), nrows = 3)#, shareX = FALSE, titleX = TRUE)
			return(ifig)
		}
	}
	return(list(Changes=total.changes.per.day,Growth.Rate=total.gr.per.day))
}


#############################################################################
#############################################################################
#### AUXILIARY FUNCTIONS FOR REPORTS

        ######################

        intro.header <- function(i, data,colN,country.col,province.col, geo.loc=NULL, scr.len=80) {

                header("#", total.len=scr.len)
                report.title <- paste(toupper(i),"Cases  -- Data dated: ",names(data)[colN]," :: ",as.character(Sys.time()))
                header('',paste0("##### ",report.title))
                header("#", total.len=scr.len)

                country.col <- pmatch("Country", names(data))
                province.col <- pmatch("Province", names(data))
                header('',paste("Number of Countries/Regions reported: ",length(unique(data[,country.col]))) )
                header('',paste("Number of Cities/Provinces reported: ",length(unique(data[,province.col]))) )
                header('',paste0("Unique number of distinct geographical locations combined: ",nrow(unique(data[,c(country.col,province.col)]))))
                header("-", total.len=scr.len)

		if (length(colN)==1) {
			if (is.null(geo.loc)) {
				locn <-"Worldwide" 
			} else {
				locn <- "For selected locations"
			}
			header('',paste(locn,i," Totals:", sum(data[,colN],na.rm=TRUE)))
			#header('',paste(Nentries," Top Totals:", sum(data[,colN],na.rm=TRUE)))
			header('-', total.len=scr.len)
		} else {
			for (j in colN) {
				header('',paste("Worldwide ",i," Totals:", sum(data[,j],na.rm=TRUE)))
			}
			#header('-', total.len=scr.len)
		}

                return(invisible(report.title))
        }

        ######################

        report.Avgs <- function(data,target="Perc",descr="") {

                Perc.col <- which(grepl(target,names(data)))
                if (length(Perc.col)>0) {
                        #print(data[,Perc.col])
			tgt.col <- data[,Perc.col]
			tgt.col <- tgt.col[!is.infinite(tgt.col)]
			#print(tgt.col)
                        mean.value <- round(mean(tgt.col,na.rm=TRUE),2)
                        sd.value <- round(sd(tgt.col,na.rm=TRUE),2)
                        if (!is.infinite(mean.value) & !is.na(mean.value))
                                header( "", paste(descr,mean.value, paste0("(sd: ",sd.value,")")) )
                }

        }

        #####################

	report.Totals <- function(cases,totals,preTitle="",totEntries=NA) {
		header('',eol='\n\n')
		header('*')
		header('*',"OVERALL SUMMARY")
		header('*')
		#print.Title <- TRUE
		for (j in names(totals)) {
			values <- totals[[j]]
			#if (print.Title) {
				header("", paste("**** ",preTitle,toupper(j),"****"))
			#	print.Title <- TRUE
			#}
			header('',paste("\t",paste(cases,collapse="\t")))
			header('',paste("\t",paste(round(values,2),collapse="\t")))
			header('',paste0("\t\t\t",paste(round(values[2:3]/(values[1])*100,2),collapse="% \t\t"),"%"))
                        ##header('',paste0("\t",paste(round(values/(totals[["tots"]][1])*100,2),collapse="% \t"),"%"))
			#header('-')
		}

		if (length(totEntries)>1 || !is.na(totEntries))
			header('',paste('\n\n',"* Statistical estimators computed considering",
					paste(totEntries,collapse="/"),
					"independent reported entries per case-type"))

		header('*')
	}

#############################################################################
#############################################################################
process.agg.cases <- function(data, Nentries, geo.loc=NULL, graphical.output) {

	#######

	comb.cols <- function(x,y) {

		#return(c(x[1:2],y[1],x[3],y[2],x[4],y[3]))
		return(c(x[1],y[1], x[2],y[2], x[3],y[3], x[4],y[4]))
	}

	#######

	# set the length in chars for the output
	scr.len <- 140

	if (graphical.output)
		par(mfrow=c(4,2))


	# clean some spurious data
	#data <- na.omit(data[,-1])

	# define some cols and values...
	colN <- ncol(data)
	nRecords <- nrow(data)

	col.names <- names(data)
	country.col <- pmatch("Country", col.names)
	province.col <- pmatch("Province", col.names)
	date.col <- pmatch("Last_Update", col.names)
	combKey.col <- pmatch("Combined",col.names)

	col.conf <- pmatch("Confirmed",col.names)
	col.death <- pmatch("Deaths",col.names)
	col.recov <- pmatch("Recovered",col.names)
	col.activ <- pmatch("Active",col.names)
	col.cases <- c(col.conf,col.death,col.recov,col.activ)

	# if Nentries i set to 0, will consider *all* entries                
	if (Nentries==0) Nentries <- nRecords

	data0 <- data

	# allowing to report by selected location
	if (!is.null(geo.loc)) {
		# check the location indicated
		geo.loc <- checkGeoLoc(data,geo.loc)
		data <- select.per.loc(data,geo.loc)
		Nentries <- min(Nentries,nrow(data))
	}
	###

	cases <- c("Confirmed","Deaths","Recovered","Active")
	for (i in col.cases) {

		header("#", total.len=scr.len)
		report.title <- paste("AGGREGATED Data  -- ORDERED BY ",toupper(col.names[i]),"Cases  -- Data dated: ",
					as.character(max(as.Date(data[,date.col]))) ,
					" :: ",as.character(Sys.time()) )
		header('',paste0("##### ",report.title))
		header("#", total.len=scr.len)

		header('',paste0("Number of Countries/Regions reported: ",length(unique(data[,country.col]))))
		header('',paste0("Number of Cities/Provinces reported: ",length(unique(data[,province.col]))))
		header('',paste0("Unique number of distinct geographical locations combined: ",
				length(unique(unlist(data[,combKey.col])))
				#nrow(unique(data[,c(country.col,province.col)]))
				) )
		header("-", total.len=scr.len)

		# display 'header'
		#report.title <- intro.header(i, data,col.cases[i],
		#				country.col,province.col, scr.len=scr.len)


		# get percentage cases
		cols.perc <- c()
		for (j in col.cases[1:4]) {
			if (j != col.conf) {
				data[,paste0("Perc.",col.names[j])]  <- round((data[,j]/data[,col.conf])*100,2)
			} else {
				# Global Perc.
				data[,paste0("Perc.",col.names[j])]  <- round((data[,j]/sum(data0[,col.conf]))*100,2)
				# Relative Perc.
				#data[,paste0("Rel.Perc.",col.names[j])]  <- round((data[,j]/sum(data[,col.conf]))*100,2)
			}
			cols.perc <- c(cols.perc,ncol(data))
		}

		# top countries/regions
		#data.ordered <- data[order(data[,i],decreasing=TRUE),][1:Nentries,c(country.col,province.col, col.cases, cols.perc)]
		custom.list <- comb.cols(col.cases,cols.perc)
		#print(custom.list)

		# Select which columns to keep from the original dataframe
		original.cols <- c(combKey.col)
		upto.record <- min(nrow(data),Nentries)
		#original.cols <- c(country.col,province.col,combKey.col)
		data.ordered <- data[order(data[,i],decreasing=TRUE),][1:upto.record,c(original.cols, custom.list)]
		#names(data.ordered) <- c("Country.Region","Province.State", cases)
		names(data.ordered)[1] <- "Location"
		row.names(data.ordered) <- 1:upto.record

                print(data.ordered)

		header("=", total.len=scr.len)

		target.col <- col.names[i]
		ord.cty.col <- pmatch("Country",names(data.ordered))
		ord.prv.col <- pmatch("Province",names(data.ordered))
		##ord.combKey.col <- pmatch("Combined",names(data.ordered))
		ord.combKey.col <- which("Location"==names(data.ordered))

#print("#!!!")
#filter.data <- na.omit(data.ordered[data.ordered[,target.col]>0,target.col])
#print(filter.data)
#print("#!!!")

                # graphics
                if (graphical.output) {
			#legends <- paste(as.character(data.ordered[,ord.cty.col]),as.character(data.ordered[,ord.prv.col]),'\n',data.ordered[,target.col])
			legends <- paste(as.character(data.ordered[,ord.combKey.col]),'\n',data.ordered[,target.col])
                        color.scheme <- heat.colors(Nentries)   #topo.colors(Nentries)
                                        #terrain.colors(Nentries)       #rainbow(Nentries)

			filter.data <- na.omit(data.ordered[data.ordered[,target.col]>0,target.col])

                        #pie(na.omit(data.ordered[,target.col]),
			pie(filter.data,
                                labels=legends,
                                main=substr(report.title,1,floor(nchar(report.title)/2)),
                                col=color.scheme )
                        #par(new=TRUE)
                        #par(mfrow=c(1,2))
                        barplot(data.ordered[,target.col], names.arg=legends,
                                col=color.scheme,
                                main=substr(report.title,ceiling(nchar(report.title)/2),nchar(report.title)) )
                }

		# remove perc cols
		data <- data[,-cols.perc]
	}

	## REPORT TOTALS
	if (TRUE) {
		#print(names(data)[col.cases])
		header('',paste("\t",paste(names(data)[col.cases],collapse='\t')))
		header('',"Totals",eol='')
		header('',paste("\t",paste(sapply(data[,col.cases],sum),collapse='\t')))
		header('',"Average",eol='')
		header('',paste("\t",paste(round(sapply(data[,col.cases],mean),2),collapse='\t')))
		header('',"Standard Deviation",eol='')
		header('',paste("\t",paste(round(sapply(data[,col.cases],sd),2),collapse="\t")))

		header('',paste('\n\n',"* Statistical estimators computed considering",nrow(data),"independent reported entries"))
	}
}


#########################################################################


report.summary <- function(cases.to.process="ALL", Nentries=10, geo.loc=NULL,
				graphical.output=TRUE, saveReport=FALSE) {
#' function to summarize the current situation, will download the latest data and summarize the top provinces/cities per case 
#'
#' @param  cases.to.process  which data to process: "TS" --time series--, "AGG" --aggregated-- or "ALL" --time series and aggregated--
#' @param  Nentries  number of top cases to display
#' @param  geo.loc  geographical location to process
#' @param  graphical.output  flag to deactivate graphical output
#' @param  saveReport  flag to indicate whether the report should be saved in a file
#'
#' @importFrom  stats  sd
#' @importFrom  grDevices  heat.colors
#' @importFrom  graphics  pie
#'
#' @export
#'
#' @examples
#' # triggers CRAN checks for timing
#' \dontrun{
#' # displaying top 10s
#' report.summary()
#'
#' # get the top 20
#' report.summary(Nentries=20,graphical.output=FALSE)
#'
#' # specify a location
#' report.summary(geo.loc="NorthAmerica")
#' }
#'

	# reassign Nentries to Nentriex
	Nentriex <- Nentries
	Ndefault <- 10

	if (!is.numeric(Nentriex)) {
		stop("The argument to this function must be a number!")
	} else if (Nentriex < 0) {
		message("Argument must be positive or zero (for listing all entries)!",'\n',
			"Will assume default value:",Ndefault)
		Nentriex <- Ndefault
	}


	# preserve user options
	old.opts <- options("width")
	options(width=200)

	# preserve user graphical environment
	if (graphical.output) {
		old.par <- par(no.readonly=TRUE)
		on.exit(par(old.par))
	}

	# save report to a file
	if (saveReport) {
			fileName <- paste0("covid19-SummaryReport_",Sys.Date(),".txt")
			# sink the output to the file and console (split=T) and include messages and warnings too
			sink(file=fileName,split=TRUE) #,type = "message")
			#file.out <- file(fileName, open = "wt")
			#sink(file.out, type = "message")
			#sink(file.out, split=TRUE)
	}

	##### PROCESS TIME SERIES DATA ######
	if ( (toupper(cases.to.process)=="ALL") | (toupper(cases.to.process)=="TS") ) {
	# first column with cases data
	col1 <- 5

	#cases <- c("confirmed","recovered","deaths")
	cases <- c("ts-confirmed","ts-deaths","ts-recovered")	#, "aggregated")

	TS.totals <- list(tots=c(),avgs=c(),sds=c())
	n0 <- c()
	for (i in cases) {
		# read data
		data <- covid19.data(i)

		# run integrity and consistency checks...
		data.checks(data,datasetName=i,details=FALSE,disclose=FALSE)

		# >> nullify data
		#data <- nullify.data(data, stringent=TRUE)

		# if Nentries is set to 0, will consider *all* entries
		if (Nentries==0) Nentriex <- nrow(data)

                colN <- ncol(data)
                nRecords <- nrow(data)
#               if (grepl("aggregated",i))      col.Tgt <-

                # get total Global for confirmed cases
		total.global <- sum(data[,colN],na.rm=TRUE)

		# reporting only for geo.loc indicated
		if (!is.null(geo.loc)) {
			# check the location indicated
			geo.loc <- checkGeoLoc(data,geo.loc)
			# selecting data and updating entries
			data <- select.per.loc(data,geo.loc)
			Nentriex <- min(Nentriex,nrow(data))
		}
		###

		colN <- ncol(data)
		nRecords <- nrow(data)

                # get totals per case
                TS.totals$tots <- c(TS.totals$tots, sum(data[,colN],na.rm=TRUE))
                TS.totals$avgs <- c(TS.totals$avgs, mean(data[,colN],na.rm=TRUE))
                TS.totals$sds <-  c(TS.totals$sds , sd(data[,colN],na.rm=TRUE))
                ###

		n0 <- c(n0,nrow(data))

		# indentify country and province columns
		country.col <- pmatch("Country", names(data))
		province.col <- pmatch("Province", names(data))

		# display 'header'
		report.title <- intro.header(i, data,colN,country.col,province.col, geo.loc)

		# Totals per countries/cities
		#data$Totals <- apply(data[,col1:colN],MARGIN=1,sum)
		data$Totals <- data[,colN]

		# store total nbr of cases
		if (grepl("confirmed",i)) {
			geo.list <- data[,c(country.col,province.col)]
                        total.cases <- data[,colN]
			colsF <- (colN+1):(colN+8)
			total.rel <- sum(total.cases)
			if (!is.null(geo.loc)) {
				data$RelPerc <- round((total.cases/total.rel)*100,2)
				colsF <- (colN+1):(colN+9)
			}
			data$GlobalPerc <- round((total.cases/total.global)*100,2)
		} else {
			# check that the countries/regions match in order to compute percentages...
			# Could improve partial matches...
			if ( sum(as.character(data[,country.col])==as.character(geo.list[1:nRecords,1]))==nRecords
				&&  sum(as.character(data[,province.col])==as.character(geo.list[1:nRecords,2]))==nRecords ) {
				# get percentage cases
				data$Perc  <- round((data[,colN]/total.cases[1:nRecords])*100,2)
				colsF <- (colN+1):(colN+8)
			} else {
				colsF <- (colN+1):(colN+7)
			}
		}
		# last day change for all cases
		data$"LastDayChange" <- data[,colN]-data[,colN-1]
		data$"t-2" <- data[,colN-1]-data[,colN-2]
		#data$"T-2" <- data[,colN]-data[,colN-2]
		data$"t-3" <- data[,colN-2]-data[,colN-3]
		data$"t-7" <- data[,colN-6]-data[,colN-7]
		data$"t-14" <- data[,colN-13]-data[,colN-14]
		data$"t-30" <- data[,colN-29]-data[,colN-30]

		# top countries/regions
		upto.record <- min(nrow(data),Nentriex)
		data.ordered <- data[order(data$Totals,decreasing=TRUE, na.last=NA),][1:upto.record,c(country.col,province.col,colsF)]
		row.names(data.ordered) <- 1:upto.record
		names(data.ordered)[1:3] <- c("Country.Region","Province.State","Totals")
	
		print(data.ordered)

		# Report Average Percentages...
		header('-')
		report.Avgs(data,"GlobalPerc",descr="Global Perc. Average: ")
		report.Avgs(data.ordered,"GlobalPerc",descr=paste("Global Perc. Average in top ",Nentriex,": "))
		header('-')

		header("=")

		# graphics
		if (graphical.output) {
			# check if there are any records with positive values
			if (sum(data.ordered$Totals>0) > 0) {
			legends <- paste(data.ordered$Country.Region,data.ordered$Province.State,'\n',data.ordered$Totals)
			color.scheme <- heat.colors(Nentriex)	#topo.colors(Nentriex)
					#terrain.colors(Nentriex)	#rainbow(Nentriex)

			par(mfrow=c(1,2))
			#par(mfrow=c(1,1))
			pie(na.omit(data.ordered$Totals[data.ordered$Totals>0]),
				labels=legends,
				main=substr(report.title,1,floor(nchar(report.title)/2)),
				col=color.scheme )

			#par(new=TRUE)
			#par(mfrow=c(1,2))
			barplot(data.ordered$Totals, names.arg=legends,
				col=color.scheme,
				main=substr(report.title,ceiling(nchar(report.title)/2),nchar(report.title)) )
			} else {
				warning("Graphical Output: Data yields non-positive results!")
			}
		}
	}

	}

	##### PROCESS "AGREGATED" DATA  #######
	if ( (toupper(cases.to.process)=="ALL") | (toupper(cases.to.process)=="AGG") ) {
		agg.data <- covid19.data("aggregated")

		# >> nullify data
		agg.data <- nullify.data(agg.data)
		process.agg.cases(agg.data, Nentries, geo.loc=geo.loc, graphical.output)

		# report integrity and consistency checks in the data
		integrity.check(agg.data,recommend=FALSE)
	}


	#### OVERALL SUMMARY
	if (!is.null(geo.loc)) {
		report.title <- paste("Time Series ",paste(geo.loc,collapse=','))
	} else {
		report.title <- "Time Series Worldwide"
	}
	if ( (toupper(cases.to.process)=="ALL") | (toupper(cases.to.process)=="TS") ) {
	        report.Totals(cases,TS.totals, preTitle=report.title,n0)	#nrow(data))
	}

	if (saveReport) {
		sink()
		message("Report saved in ",fileName)
	}

	# reset options to user ones
	options(old.opts)

	#return(TS.totals)
}

#############################################################################


single.trend <- function(ts.data, confBnd=TRUE, info="") {
#' function to visualize different indicators for trends in daily changes of cases reported as time series data
#' 
#' @param  ts.data  time series data
#' @param  confBnd  optional argument to remove the drawing of a confidence band
#' @param  info  addtional information to display in plots
#'
#'
#' @importFrom  graphics  axis.Date lines mtext
#'
#' @export
#'
#' @examples
#' \donttest{
#' tor.data <- covid19.Toronto.data()
#' single.trend(tor.data[tor.data$status=="Active Cases",]) 
#' }
#'
#' ts.data <- covid19.data("ts-confirmed")
#' ont.data <- ts.data[ ts.data$Province.State == "Ontario",]
#' single.trend(ont.data)
#'
#' single.trend(ts.data[ ts.data$Country.Region=="Italy",])


        # check that the data is time series
        chk.TS.data(ts.data,xtp=TRUE)

        # first column with daily data
        col1 <- 5
	Ncols <- ncol(ts.data)

	cty.col <- pmatch("Country",names(ts.data))
	prv.col <- pmatch("Province",names(ts.data))
	lat.col <- pmatch("Lat",names(ts.data))
	lng.col <- pmatch("Long",names(ts.data))

	cols.range <- col1:Ncols

	if (nrow(ts.data)>1) {
		#ts.data <- cbind(unique(as.character(ts.data[,c(cty.col)])),
		#		paste(ts.data[,prv.col],collapse="/"),
		#		rbind(apply(ts.data[,c(lat.col,lng.col)],MARGIN=2,mean)),
		#		rbind(apply(ts.data[,col1:Ncols],MARGIN=2,sum)) )

		#ts.data[,5:Ncols] <- as.numeric(ts.data[,5:Ncols])
		ts.data <- apply(ts.data[,col1:Ncols],MARGIN=2,sum)
		cols.range <- 1:length(ts.data)
	}

	xvar <- as.Date(names(ts.data)[cols.range])
	xvar.diff <- xvar[-length(xvar)]
	yvar  <- as.numeric(ts.data[cols.range])
	yvar.diff <- diff(yvar,lag=1)


	# preserve user graphical environment
	old.par <- par(no.readonly=TRUE)
	on.exit(par(old.par))


	### MAIN PLOT
	# plot diff
	plot( xvar.diff,yvar.diff, type='b', cex=0.5,
		#ylim=c(min(yvar.diff),1.15*max(yvar.diff)),
		axes=FALSE, xlab="Time (dates)", ylab="Daily Changes" )
	mtext(info,side=3,padj=0,adj=0)
	axis.Date(1,xvar.diff)
	axis(side=2)

	if (confBnd)
		confBand(xvar.diff,yvar.diff, 1,length(yvar.diff),0,max(yvar.diff,na.rm=TRUE), windowsNbr=10, lcolour='black',lwidth=0.75)


	### SUBPLOTS
	par(new=TRUE)
	par(mfrow=c(4,4), mai = c(0.3, 0.1, 0.1, 0.3))
	par(mfg=c(1,2))

	# plot cumulative nbr of cases
	plot(xvar,yvar, type='l', col='darkblue', xlab='',ylab='Nbr of Cases')
#	if (confBnd)
#		confBand(xvar,yvar, 1,length(yvar),0,max(yvar,na.rm=TRUE), windowsNbr=10, lcolour='black', lwidth=0.5)
	par(new=TRUE)
	plot(xvar,log1p(yvar), type='l', col='blue', lty='dotdash', axes=F, xlab='',ylab='')
	axis(4)

	# plot diff vs total nbr of cases
	par(new=FALSE)
	par(mfg=c(1,3))
	plot(yvar.diff ~ yvar[-length(yvar)], type='l', lwd=0.35,  axes=F, xlab='',ylab='')
	axis(1); axis(4)
	if (confBnd)
		confBand(yvar[-length(yvar)],yvar.diff, 1,length(yvar.diff),0,max(yvar.diff,na.rm=TRUE), windowsNbr=10, lcolour='black',lwidth=0.75)

	### 
	plot( log1p(yvar.diff) ~ log1p(yvar[-length(yvar)]),
			type='p', cex=0.35, pch=20, lwd=.25,  axes=F, xlab='',ylab='',
			xlim=c(0,max(log1p(yvar),log1p(yvar.diff), na.rm=TRUE)) ,
			ylim=c(0,max(log1p(yvar),log1p(yvar.diff), na.rm=TRUE))	)
	lines(log1p(yvar.diff) ~ log1p(yvar[-length(yvar)]), lwd=.25)
	confBand(log1p(yvar[-length(yvar)]),log1p(yvar.diff), .1,length(yvar.diff),0,max(log1p(yvar.diff),na.rm=TRUE), windowsNbr=10, lcolour='black',lwidth=.75)
	abline(0,1, lty=4)
	axis(side=1); axis(side=4)

	# GROWTH RATE
	par(mfg=c(2,2))
	gr.rate <- (yvar.diff[2:length(yvar.diff)]/yvar.diff[1:(length(yvar.diff)-1)])
	#plot(xvar.diff,gr.rate, type='l', ylab="Growth rate", lwd=0.75)
	#confBand(xvar.diff,gr.rate, .5,length(gr.rate),0,max(gr.rate,na.rm=TRUE), windowsNbr=10, lcolour='black')
	par(mfrow=c(6,4), mai=c(0,0,0,0))
	par(mfg=c(3,2))
	mask.data <- which(!is.nan(gr.rate) & !is.infinite(gr.rate) & gr.rate>0)
	gr.rate <- gr.rate[mask.data]

	# will draw plots if there is enough data points...
	if (length(gr.rate)>1) {
		barplot(gr.rate)
		#axis.Date(1,xvar.diff)

	# Normalized growth rate
		par(mfrow=c(15,4), mai=c(0,0,0,0))
		par(mfg=c(5,2))
		norm.gr.rate <- gr.rate/max(gr.rate)

		norm.gr.rate <- (norm.gr.rate)
		plot(xvar.diff[mask.data],norm.gr.rate, axes=FALSE, type='s', lwd=.3)
		#axis.Date(1,xvar.diff[mask.data])
		axis(4)
		confBand(xvar.diff[mask.data],norm.gr.rate, .25,length(norm.gr.rate),0,max(norm.gr.rate,na.rm=TRUE), windowsNbr=15, lcolour='black',lwidth=0.75)
	}
}

#############################################################################

mtrends <- function(data, geo.loc=NULL, confBnd=TRUE, info="") {
#' function to visualize different indicators for trends in daily changes of cases reported as time series data, for mutliple (or single) locations
#'
#' @param  data  data.frame with *time series* data from covid19
#' @param  geo.loc  list of locations
#' @param  confBnd  flag to activate/deactivate drawing of confidence bands base on a moving average window
#' @param  info  additional info to display in the plot
#'
#' @export
#'
#' @importFrom  graphics   barplot par plot abline axis 
#'
#' @examples
#' # triggers CRAN checks for timing
#' \dontrun{
#' ts.data <- covid19.data("ts-confirmed")
#' mtrends(ts.data, geo.loc=c("Canada","Ontario","Uruguay","Italy"))
#' }

        # check that the data is time series
        chk.TS.data(data,xtp=TRUE)

        # check the location indicated
        geo.loc <- checkGeoLoc(data,geo.loc)

	results <- list()

	# Process each case if status is present...
        if ("status" %in% names(data)) {
                for (i in unique(data$status) ) {
                        message("Considering ",i, " cases")
                        datai <- data[ data$status == i, ]
                        datai <- datai[, ! names(data) %in% "status", drop = F]
                        result.per.case <- mtrends(datai,geo.loc,confBnd, i)
                        results <- list(result.per.case, results)
                }
                return(results)
        }

        for (i in geo.loc) {
                # check whether the locations are countries/regions or provinces/states
                cases.per.loc <- select.per.loc(data,i)

		Country.col <- pmatch("Country", names(data))
		Province.col <- pmatch("Province", names(data))

		colN <- ncol(cases.per.loc)
		if (tolower("status") %in% tolower(cases.per.loc))
			colN <- colN - 1

		for (j in 1:nrow(cases.per.loc)) {
			locn <- paste(cases.per.loc[j,Country.col],cases.per.loc[j,Province.col],collapse=" ")
			header('',paste("Processing ",locn,"..."))
			single.trend(cases.per.loc[j,],confBnd,paste(locn,info))
		}
	}
}

#############################################################################
Any scripts or data that you put into this service are public.
covid19.analytics documentation built on Oct. 16, 2023, 1:06 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
covid19.analytics
Load and Analyze Live Data from the COVID-19 Pandemic

R/covid19_anal.R
In covid19.analytics: Load and Analyze Live Data from the COVID-19 Pandemic

Defines functions mtrends single.trend report.summary process.agg.cases report.Totals report.Avgs intro.header growth.rate tots.per.location

Documented in growth.rate mtrends report.summary single.trend tots.per.location

Try the covid19.analytics package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

covid19.analytics Load and Analyze Live Data from the COVID-19 Pandemic

R/covid19_anal.R In covid19.analytics: Load and Analyze Live Data from the COVID-19 Pandemic

Defines functions mtrends single.trend report.summary process.agg.cases report.Totals report.Avgs intro.header growth.rate tots.per.location

Documented in growth.rate mtrends report.summary single.trend tots.per.location

Try the covid19.analytics package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

covid19.analytics
Load and Analyze Live Data from the COVID-19 Pandemic

R/covid19_anal.R
In covid19.analytics: Load and Analyze Live Data from the COVID-19 Pandemic