R/covid19_models.R

Defines functions sweep.SIR.models plt.SIR.model generate.SIR.model simple.SIR.ODE evalModel genModel

Documented in generate.SIR.model genModel plt.SIR.model simple.SIR.ODE sweep.SIR.models

# Modelling fns part of the covid19.analytics package
#
# M.Ponce


#######################################################################

genModel <- function(y, deg=1) {
#' function to generate models using Linear Regression "LM"
#'
#' @param  y  vector containing the data to fit
#' @param  deg  degree of the polynomial fit
#' 
#' @keywords internal
#'
#' @importFrom  stats  lm
#'

	y.var <- unlist(y)
	x.var <- 1:length(y.var)
	#print(x.var)
	#print(y.var)

	model <- lm(y.var ~ x.var)

	#header('-')
	header('',"Linear Regression (lm):")
	print(summary(model))
	header('-')

	return(model)	
}

evalModel <- function(model) {

}


#######################################################################

gen.glm.model <- function (y, family=Gamma(link="log")) {
#' function to generate models using GLM
#'
#' @param  y  vector containing the data to fit
#' @param  family  family to use in the GLM method
#'
#' @keywords internal
#'
#' @importFrom  stats  glm  Gamma poisson
#' @importFrom  utils  capture.output
#'

	y.var <- unlist(y)
	x.var <- 1:length(y.var)
	#print(x.var)
	#print(y.var)

	model <- glm(y.var ~ x.var, family=family)

	#header('-')
	header('',paste0("GLM using Family ",
		paste(capture.output(eval(family)),collapse="")," :") )
	print(summary(model))
	header('-')

	return(model)
}

#######################################################################
#######################################################################
##### UNDER ACTIVE DEVELOPMENT #####
#######################################################################
#######################################################################

simple.SIR.ODE <- function(time, state, parameters) {
#' Define an ODE system for a simple SIR model
#'
#' @param  time  time variable
#' @param  state  state variable
#' @param  parameters parameters of the ODE
#'
#' @keywords internal
#'

  ODE.params <- as.list(c(state, parameters))

  # define ODE
  with(ODE.params, {
    dSdt <- -beta/N * I * S
    dIdt <- beta/N * I * S - gamma * I
    dRdt <- gamma * I
    list(c(dSdt, dIdt, dRdt))
    })
}


generate.SIR.model <- function(data=NULL, geo.loc="Hubei",
				t0=NULL,t1=NULL, deltaT=NULL,
				tfinal=90,
				fatality.rate = 0.02,
				tot.population=1400000000,
				staticPlt=TRUE, interactiveFig=FALSE, add.extras=FALSE) {
#' function to generate a simple SIR (Susceptible-Infected-Recovered) model based on the actual data of the coivd19 cases
#'
#' @param  data  time series dataset to consider
#' @param  geo.loc  country/region to analyze
#' @param  t0  initial period of time for data consideration
#' @param  t1  final period of time for data consideration
#' @param  deltaT interval period of time from t0, ie. number of days to consider since t0
#' @param  tfinal  total number of days
#' @param  fatality.rate  rate of causality, deafault value of 2 percent
#' @param  tot.population  total population of the country/region
#' @param  staticPlt  optional flag to activate/deactive plotting of the data and the SIR model generated
#' @param  interactiveFig  optional flag to activate/deactive the generation of an interactive plot of the data and the SIR model generated
#' @param  add.extras  boolean flag to add extra indicators, such as, the "force of infection" and time derivatives
#'
#' @importFrom  stats  optim setNames
#' @importFrom  deSolve  ode
#'
#' @export
#'
#' @examples
#' data <- covid19.data("ts-confirmed")
#' generate.SIR.model(data,"Hubei", t0=1,t1=15)
#' generate.SIR.model(data,"Germany",tot.population=83149300)
#' generate.SIR.model(data,"Uruguay", tot.population=3500000)
#' generate.SIR.model(data,"Canada", tot.population=37590000, add.extras=TRUE)
#'

	# DISCLAIMER // EXPERIMENTAL FEATURES
	header("#")
	message("This is an experimental feature, being currently under active development!")
	message("Please check the development version of the package for the latest updates on it")
	header("#")


	# check that the data is time series data
	chk.TS.data(data,xtp=TRUE)

	# get actual data from indicated region...
	pot.Infected <- preProcessingData(data,geo.loc)
	# eliminate entries without cases, ie equal 0
	#pot.Infected <- pot.Infected[cumsum(pot.Infected)!=0]
	#print(pot.Infected)


        # default values
        t0.default=30
        deltaT.default=25
        t1.default=t0.default+deltaT.default
	#
	colOffset <- 5
        ###########
	
	if (is.null(t0)) {
		# attempt to auto-detect significant growth....
		# will use a moving average of 2 days...
		#x <- movingFn(pot.Infected,period=2)
		x <- pot.Infected
		print(x)
		l1 <- length(x)
		l2 <- l1-1
		threashold <- 10
		t0 <- min(which(cumsum( (x[2:l1]-x[1:l2]) > threashold) != 0),na.rm=TRUE)
		print(t0)
		if (!is.null(t1)) {
			if (t1>t0) {
				Infected <- pot.Infected[t0:min(t1,l1)]
			} else {
				Infected <- pot.Infected[t0:min(t0+deltaT.default,l1)]
			}
		} else if (!is.null(deltaT)) {
				Infected <- pot.Infected[t0:min(t0+deltaT,l1)]
			} else {
				Infected <- pot.Infected[t0:min(t0+deltaT.default,l1)]
			}
		#print(Infected)
	} else if (!is.null(t1)) {
			# user indicated t0 & t1
			Infected <- pot.Infected[t0:t1]
		} else if (!is.null(deltaT)) {
				# user indicated t0 & deltaT
				Infected <- pot.Infected[t0:(t0+deltaT)]
			} else {
				Infected <- pot.Infected
			}


#	if (is.null(t0) & is.null(t1) & is.null(deltaT)) {
#		t0 <- t0.default
#		t1 <- t1.default
#	}

	print(Infected)


	###########
	header('-',"Parameters used to create model ")
	header("",paste0('\t',"Region: ",toupper(geo.loc)))
	header("",paste0('\t',"Time interval to consider: t0=",t0," - t1=",t1," ; tfinal=",tfinal))
	header("",paste0('\t\t',"t0: ",names(data)[t0+colOffset]," -- t1: ",names(data)[t1+colOffset]))
	header("",paste0('\t',"Number of days considered for initial guess: ",length(Infected)))
	header("",paste0('\t',"Fatality rate: ",fatality.rate))
	header("",paste0('\t',"Population of the region: ",tot.population))
	header('-')
	###########

	#print(Infected)

	# total population of the region
	# N <<- tot.population
	# needs to be a global variable for the ODE solver
	# will enforce a new environment
	.SIR.model.env <- new.env()
	assign("N",tot.population, envir = .SIR.model.env)	#.GlobalEnv)

	#### TEST CASES ####
	###### MAINLAND CHINA #####
	#Infected <- c(45, 62, 121, 198, 291, 440, 571, 830, 1287, 1975, 2744, 4515, 5974, 7711, 9692, 11791, 14380, 17205, 20440)
	#N <- 1400000000 # population of mainland china
	###### GERMANY #######
	#Infected <- c(16, 18, 21, 26, 53, 66, 117, 150, 188, 240, 349, 534, 684, 847, 1110, 1458, 1881, 2364, 3057, 3787, 4826, 5999)
	#N <- 83149300 # population of Germany acc. to Destatis
	#######################


	# Determine nbr of days 
	Day <- 1:(length(Infected))

        
	loadLibrary("deSolve")
	init.cond <- c(S = .SIR.model.env$N-Infected[1], I = Infected[1], R = 0)

	# define RSS fn to measure deviation of the model from data....
	RSS <- function(parameters) {
		# include N as a parameter for the ODE
		parameters <- c(parameters,.SIR.model.env$N)
		names(parameters) <- c("beta", "gamma","N")
		out <- ode(y = init.cond, times = Day, func = simple.SIR.ODE, parms = parameters)
		fit <- out[ , 3]
		sum((Infected - fit)^2)
	}
	#######
 

	Opt <- optim(c(0.5, 0.5), RSS, method = "L-BFGS-B", lower = c(0, 0), upper = c(1, 1)) # optimize with some sensible conditions
	print(Opt$message)
	## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
 
	Opt_par <- setNames(Opt$par, c("beta", "gamma"))
	print(Opt_par)
	##      beta     gamma 
	## 0.6746089 0.3253912

	# add N as a parameter for the ODE...
	Opt_par <- c(Opt_par,.SIR.model.env$N)
	names(Opt_par)[3] <- "N"

	# definte integration interval... 
	time <- 1:tfinal # time in days
	fit <- data.frame(ode(y = init.cond, times = time, func = simple.SIR.ODE, parms = Opt_par))

	# R_naught
	R0 <- setNames(Opt_par["beta"] / Opt_par["gamma"], "R0")
	header("",paste("R0 =",R0))
 
	# height of pandemic
	max.I <- max(fit$I)
	max.I.time <- fit$time[fit$I==max.I]
	header("",paste("Max nbr of infected:", round(max.I,2), " (",round((max.I/tot.population)*100,2),"%)"))
	#print(fit[fit$I == max(fit$I), "I", drop = FALSE]) 


	# Max number of casualties
	header("",paste("Max nbr of casualties, assuming ", paste0(round(fatality.rate*100,2),"% fatality rate:"),
		round(max.I*fatality.rate,2)))
	header("",paste("Max reached at day :", max.I.time, '==> ',as.Date(names(data)[t0+colOffset])+max.I.time))

	header("=")

	# group fit and data in an object to be returned
	SIR.model <- list(Infected=Infected, model=fit, params=list(beta=Opt_par["beta"],gamma=Opt_par["gamma"],R0=R0))

	# display plots if requested
	if (staticPlt | interactiveFig)
		plt.SIR.model(SIR.model,geo.loc, interactiveFig, add.extras=add.extras)

	#class(SIR.model) <- "SIR.model"
	return(SIR.model)
}

#######################################################################
#######################################################################

plt.SIR.model <- function(SIR.model, geo.loc="",
				interactiveFig=FALSE, fileName=NULL, interactive.display=TRUE,
				add.extras=TRUE) {
#' function to plot the results from the SIR model fn
#'
#' @param  SIR.model model resulting from the generate.SIR.model() fn
#' @param  geo.loc  optional string to specify geographical location
#' @param  interactiveFig  optional flag to activate interactive plot
#' @param  fileName  file where to save the HTML version of the interactive figure
#' @param  interactive.display  boolean flag to indicate whether the interactive plot will be displayed (pushed) to your browser
#' @param  add.extras  boolean flag to add extra indicators, such as, the "force of infection" and time derivatives
#'
#' @export plt.SIR.model
#'
#' @importFrom  graphics  matplot matlines title legend points
#' @importFrom  plotly  plot_ly %>% add_trace as_widget subplot
#' @importFrom  htmlwidgets  saveWidget
#'

	# recover data from model
	Infected <- SIR.model$Infected
	fit <- SIR.model$model

        # Determine nbr of days 
        Day <- 1:(length(Infected))


        ### preserve user graphical env.
        # save the state of par() before running the code
        oldpar <- par(no.readonly = TRUE)
        # restore the previous state after the fn is done, even if it fails, so the user environment is not altered
        on.exit(par(oldpar))
        #########

        par(mfrow = c(2, 2))
        plot(Day, Infected, type ="b")
	if (add.extras) lines(Day[-1],diff(Infected), lty=2)
        plot(Day, Infected, log = "y")
        abline(lm(log10(Infected) ~ Day))
	if (add.extras) lines(Day[-1],log10(diff(Infected)), lty=2)

        title(paste("Confirmed Cases 2019-nCoV:",toupper(geo.loc)), outer = TRUE, line = -2)

        col <- c("blue","red","green")

        matplot(fit$time, fit[ , 2:4], type = "l", xlab = "Day", ylab = "Number of subjects", lwd = 2, lty = 1, col = col)
	if (add.extras) {
			matlines(fit$time[-1], sapply(fit[ , 2:4],diff), type = "l", lwd = 1, lty = 2, col = col)
			matlines(fit$time, fit[,3]**SIR.model$params$beta, type = "l", lwd = 0.75, lty = 6, col = "purple")
	}
        matplot(fit$time, fit[ , 2:4], type = "l", xlab = "Day", ylab = "Number of subjects", lwd = 2, lty = 1, col = col, log = "y")
	if (add.extras) {
		matlines(fit$time[-1], sapply(fit[ , 2:4],diff), type = "l", lwd = 1, lty = 2, col = col, log="y")
		matlines(fit$time, fit[,3]**SIR.model$params$beta, type = "l", lwd = 0.75, lty = 6, col = "purple", log="y")
	}
        ## Warning in xy.coords(x, y, xlabel, ylabel, log = log): 1 y value <= 0
        ## omitted from logarithmic plot

        points(Day, Infected)
        legend("bottomright", c("Susceptible", "Infected", "Recovered"), lty = 1, lwd = 2, col = col, inset = 0.05)
        title(paste("SIR model 2019-nCoV:", toupper(geo.loc)), outer = TRUE, line = -22)
        #axis.Date(1,as.Date(names(data)[colOffset:ncol(data)]))


        ### INTERACTIVE PLOTS
        if (interactiveFig) {
                # load/check plotly
                loadLibrary("plotly")

                # define interactive figure/plot
		model.ifig <- plot_ly(data=fit, x = ~fit[,1])

		length(Infected) <- length(fit[,1])

		loc.data <- cbind(Infected,fit[,1:4], Force=(fit[,3]*SIR.model$params$beta) )
		#print(loc.data)

		#model.ifig <- model.ifig %>% add_trace(y = ~Infected, name="Actual data", type='scatter', mode='markers', visible=TRUE)
		# add traces
		model.ifig <- add.N.traces(model.ifig,loc.data, c("data","Susceptible", "Infected", "Recovered","Force"), vis=TRUE)

		# extra traces for activating log-scale
		model.ifig <- add.N.traces(model.ifig, log10(loc.data), c("data","Susceptible", "Infected", "Recovered", "Force"), vis=FALSE)

		# log-scale menu based on nbr of traces...
                        
		updatemenues <- log.sc.setup(5)

                # add a menu for switching log/linear scale
                model.ifig <- model.ifig %>% layout(updatemenus=updatemenues)

		### DERIVS ###
		derivs <- cbind(time=fit$time[-1],as.data.frame(lapply(fit[,2:4],diff)) ,force=diff(loc.data$Force))
		#print(str(derivs))
		derivs.ifig <- plot_ly(data=derivs, x=derivs$time)
		derivs.data <- cbind(diff(Infected),derivs)
		#print(str(derivs.data))
		derivs.ifig <- add.N.traces(derivs.ifig, derivs.data, c("Deriv.data","Deriv.Susceptible", "Deriv.Infected", "Deriv.Recovered","Deriv.Force"), vis=TRUE)
		derivs.ifig <- add.N.traces(derivs.ifig, log10(derivs.data), c("Deriv.data","Deriv.Susceptible", "Deriv.Infected", "Deriv.Recovered","Deriv.Force"), vis=FALSE)
		updatemenues <- log.sc.setup(5)
		derivs.ifig <- derivs.ifig %>% layout(updatemenus=updatemenues)
		###
		ifigs <- subplot(list(model.ifig, derivs.ifig), nrows = 2, shareX = TRUE, titleX = TRUE)
		#print(ifigs)


		# activate interactive figure
		if (interactive.display) {
			print(model.ifig)
			print(ifigs)
		}

		#return(model.ifig)

		if (!is.null(fileName)) {
			FileName <- paste0(fileName,".html")
			# informing where the plot is going to be saved
			message("Saving interactive plot in ", FileName)
			htmlwidgets::saveWidget(as_widget(model.ifig), FileName)
		}

		if (add.extras) {
			return(ifigs)
		} else {
			return(model.ifig)
		}
	}
}

#######################################################################


sweep.SIR.models <- function(data=NULL, geo.loc="Hubei",
		t0_range=15:20,
			t1=NULL, deltaT=NULL,
			tfinal=90,
			fatality.rate = 0.02,
			tot.population=1400000000) {
#'
#' function to perform a sweep of models and generate values of R0
#'
#' @param  data  time series dataset to consider
#' @param  geo.loc  country/region to analyze
#' @param  t0_range  range of initial date for data consideration
#' @param  t1  final period of time for data consideration
#' @param  deltaT interval period of time from t0, ie. number of days to consider since t0
#' @param  tfinal  total number of days
#' @param  fatality.rate  rate of causality, deafault value of 2 percent
#' @param  tot.population  total population of the country/region
#'
#' @export
#'
#' @examples
#' # read TimeSeries data
#' TS.data <- covid19.data("TS-confirmed")
#' # select a location of interest, eg. France
#' # France has many entries, just pick "la France"
#' France.data <- TS.data[ (TS.data$Country.Region == "France") & (TS.data$Province.State == ""),]
#' # sweep values of R0 based on range of dates to consider for the model
#' ranges <- 15:20
#' deltaT <- 20
#' params_sweep <- sweep.SIR.models(data=France.data,geo.loc="France", t0_range=ranges, deltaT=deltaT)
#' # obtain the R0 values from the parameters
#' R0s <- unlist(params_sweep["R0",])
#' # nbr of infected cases
#' FR.infs<- preProcessingData(France.data,"France")
#' # average per range
#' # define ranges
#' lst.ranges <- lapply(ranges, function(x) x:(x+deltaT))
#' # compute averages
#' avg.FR.infs <- lapply(lst.ranges, function(x) mean(FR.infs[x]))
#' # plots
#' plot(R0s, type='b')
#' # plot vs average number of infected cases
#' plot(avg.FR.infs, R0s, type='b')
#'

        wrapperFn <- function(t0) {
        # wrapper fn to generate.SIR.model

                #m1 <- generate.SIR.model(data,geo.loc,t0,t1,deltaT=20 ,fatality.rate,tot.population,
                #       staticPlt=FALSE,interactiveFig=FALSE, add.extras=FALSE)
                m1 <- generate.SIR.model (data=data, geo.loc=geo.loc, t0, t1=t1, deltaT=deltaT, tfinal=tfinal,
						fatality.rate=fatality.rate, tot.population=tot.population,
						interactiveFig=FALSE, staticPlt=FALSE)
                return(m1$params)
        }


        models <- sapply(t0_range,wrapperFn)
        #models <- mapply(generate.SIR.model, data,geo.loc,t0_range,t1,deltaT,tfinal,
        #                       fatality.rate, tot.population,
        #                       interactiveFig=FALSE, staticPlt=FALSE)

        return(models)
}

#######################################################################

Try the covid19.analytics package in your browser

Any scripts or data that you put into this service are public.

covid19.analytics documentation built on Aug. 18, 2022, 9:06 a.m.