R/HistDAWass-package.R

#https://github.com/variani/pckdev/wiki/Documenting-with-roxygen2
#http://stackoverflow.com/questions/7356120/how-to-properly-document-s4-methods-using-roxygen2

#' Histogram-Valued Data Analysis
#' 
#' We consider histogram-valued data, i.e., data described by univariate histograms. The
#' methods and the basic statistics for histogram-valued data are mainly based
#' on the L2 Wasserstein metric between distributions, i.e., a Euclidean metric
#' between quantile functions. The package contains unsupervised classification techniques, 
#' least square regression and tools for histrogram-valued data and for histogram time series.
#'
#' 
#' \tabular{ll}{ Package: \tab HistDAWass\cr Type: \tab Package\cr Version:
#' \tab 0.1.1\cr Date: \tab 2014-09-17\cr License: \tab GPL (>=2)\cr Depends:
#' \tab methods\cr } ~~ An overview of how to use the package, including the
#' most important functions ~~
#' 
#' @name HistDAWass-package
#' @aliases HistDAWass-package HistDAWass
#' @docType package
#' @author Antonio Irpino <[email protected]@unicampania.it>
#' @references
#' Irpino, A., Verde, R. (2015) \emph{Basic
#' statistics for distributional symbolic variables: a new metric-based
#' approach}, Advances in Data Analysis and Classification, Volume 9, Issue 2, pp 143--175.
#' DOI \url{https://doi.org/10.1007/s11634-014-0176-4}\cr
#' 
#' @import ggplot2 grid
#' @keywords package
#' @examples
#' 
#' # Generating a list of distributions
#' a<-vector("list",4)
#' a[[1]]<-distributionH(x=c(80,100,120,135,150,165,180,200,240),
#'                       p=c(0,0.025,0.1,0.275,0.525,0.725,0.887,0.975,1))
#' a[[2]]<-distributionH(x=c(80,100,120,135,150,165,180,195,210,240),
#'                       p=c(0,0.013,0.101,0.255,0.508,0.718,0.895,0.961,0.987,1))
#' a[[3]]<-distributionH(x=c(95,110,125,140,155,170,185,200,215,230,245),
#'                       p=c(0,0.012,0.041,0.154,0.36,0.595,0.781,0.929,0.972,0.992,1))
#' a[[4]]<-distributionH(x=c(105,120,135,150,165,180,195,210,225,240,260),
#'                       p=c(0,0.009,0.035,0.081,0.186,0.385,0.633,0.832,0.932,0.977,1))
#' # Generating a list of names of observations
#' namerows<-list( 'u1'  , 'u2') 
#' # Generating a list of names of variables
#' namevars<-list( 'Var_1'  , 'Var_2') 
#' # creating the MatH  
#' Mat_of_distributions<-MatH(x=a, nrows = 2, ncols = 2, 
#'                             rownames=namerows, varnames=namevars, by.row=FALSE )
#' 
NULL


#' Full Ozone dataset for Histogram data analysis
#' 
#' The dataset contains MatH (matrix of histogram-valued data) object This data
#' set list 78 stations located in the USA recording four variables, without
#' missing data.
#' 
#' 
#' @name OzoneFull
#' @docType data
#' @format a \code{MatH} istance, 1 row per station.
#' @author Antonio Irpino, 2014-10-05
#' @source http://java.epa.gov/castnet/epa_jsp/prepackageddata.jsp
#' ftp://ftp.epa.gov/castnet/data/metdata.zip
NULL



#' Complete Ozone dataset for Histogram data analysis
#' 
#' The dataset contains MatH (matrix of histogram-valued data) object This data
#' set list 84 stations located in the USA recording four variables. Some
#' stations contains missing data.
#' 
#' 
#' @name OzoneH
#' @docType data
#' @format a \code{MatH} istance, 1 row per station.
#' @author Antonio Irpino, 2014-10-05
#' @source http://java.epa.gov/castnet/epa_jsp/prepackageddata.jsp
#' ftp://ftp.epa.gov/castnet/data/metdata.zip
NULL

#' Blood dataset for Histogram data analysis
#' 
#' The dataset contains a MatH (matrix of histogram-valued data) object This
#' data set list 14 groups of patients described by 3 variables.
#' 
#' 
#' @name BLOOD
#' @docType data
#' @format a \code{MatH} istance, 1 row per group.
#' @author Antonio Irpino, 2014-10-05
#' @source Billard L. and Diday E. (2006). Symbolic Data Analysis: Conceptual
#' Statistics and Data Mining, Wiley.
NULL

#' Blood dataset from Brito P. for Histogram data analysis
#' 
#' The dataset contains a MatH (matrix of histogram-valued data) object This
#' data set list 10 patients described by 2 variables.
#' 
#' 
#' @name BloodBRITO
#' @docType data
#' @format a \code{MatH} istance, 1 row per patient.
#' @author Antonio Irpino, 2014-10-05
#' @source Dias, S. and Brito P. Distribution and Symmetric Distribution
#' Regression Model for Histogram-Valued Variables, ArXiv, arXiv:1303.6199
#' [stat.ME]
NULL

#' Age pyramids of all the countries of the World in 2014
#' 
#' The dataset contains a MatH (matrix of histogram-valued data) object, with three 
#' hisogram-valued variables, the 5-years age (relative frequencies) distribution of all the population, 
#' of the male and of the female population of 228 countries of the World. The  first row is the World data.
#' Thus it contains 229 rows(228 countries plus the World) and 3 variables: "Both.Sexes.Population", 
#' "Male.Population", "Female.Population" 
#' 
#' 
#' @name Age_Pyramids_2014
#' @docType data
#' @format a \code{MatH} object, a matrix of distributions.
#' @author Antonio Irpino, 2014-10-05
#' @source United States Census Bureau \url{https://www.census.gov/data.html}
NULL

#' Agronomique data
#' 
#' A dataset with the distributions of marginal costs of farms in 22 France regions. It contains four 
#' histogram variables: "Y_TSC" (Total costs of a farm),  "X_Wheat" (Costs for Wheat), "X_Pig" (Costs for Pigs)
#' "X_Cmilk" (Costs for Cow Milk)
#' 
#' @name Agronomique
#' @docType data
#' @format a \code{MatH} object, a matrix of distributions.
#' @author Antonio Irpino, 2014-10-05
#' @source Rosanna Verde, Antonio Irpino, Second University of Naples; Dominique Desbois, UMR Economie
#' publique, INRA-AgroParisTech, How to cope with modelling and privacy concerns? A regression model and a visualization tool for
#' aggregated data,  Conference of European Statistics Stakeholders,
#' Rome, November, 24-25,2014
NULL

#' A monthly climatic dataset of China
#' 
#' A dataset with the distributions of some climatic variables collected for each month in 60 stations of China.
#' The collected variables are 168 i.e. 14 climatic variables observed for 12 months. The 14 variables are the following:
#'  mean station pressure (mb), mean temperature, mean maximum temperature, mean minimum temperature,
#'   total precipitation (mm), sunshine duration (h), mean cloud amount (percentage of sky cover),
#'   mean relative humidity (%), snow days (days with snow cover), dominant wind direction (degrees),
#'    mean wind speed (m/s), dominant wind frequency (%), extreme maximum temperature (C),
#'    extreme minimum temperature. 
#' Use the command \code{ get.MatH.main.info(China_Month)} for rapid info.
#'
#' @name China_Month
#' @docType data
#' @format a \code{MatH} object, a matrix of distributions.
#' @author Antonio Irpino, 2014-10-05
#' @source raw data are available here: \url{http://cdiac.ess-dive.lbl.gov/ftp/tr055/}
NULL

#' A seasonal climatic dataset of China
#' 
#' A dataset with the distributions of some climatic variables collected for each season in 60 stations of China.
#' The collected variables are 56 i.e. 14 climatic variables observed for 4 seasons. The 14 variables are the following:
#'  mean station pressure (mb), mean temperature, mean maximum temperature, mean minimum temperature,
#'   total precipitation (mm), sunshine duration (h), mean cloud amount (percentage of sky cover),
#'   mean relative humidity (%), snow days (days with snow cover), dominant wind direction (degrees),
#'    mean wind speed (m/s), dominant wind frequency (%), extreme maximum temperature (C),
#'    extreme minimum temperature. 
#' Use the command \code{ get.MatH.main.info(China_Seas)} for rapid info.
#'
#' @name China_Seas
#' @docType data
#' @format a \code{MatH} object, a matrix of distributions.
#' @author Antonio Irpino, 2014-10-05
#' @source raw data are available here: \url{http://cdiac.ess-dive.lbl.gov/ftp/tr055/}. 
#' Climate Data Bases of the People's Republic of China 1841-1988 (TR055)
#' DOI: 10.3334/CDIAC/cli.tr055
NULL

#' Stations coordinates of China_Month and China_Seas datasets
#' 
#' A dataset containing the geographical coordinates of stations described in China_Month and China_Seas datasets
#'
#' 
#' @name stations_coordinates
#' @docType data
#' @format a data.frame
#' @author Antonio Irpino, 2014-10-05
#' @source raw data are available here: \url{http://cdiac.ess-dive.lbl.gov/ftp/tr055/}. 
#' Climate Data Bases of the People's Republic of China 1841-1988 (TR055)
#' DOI: 10.3334/CDIAC/cli.tr055
NULL

#' A histogram-valued dataset of returns
#' 
#' A histogram-valued dataset of returns of dollar vs yen change rates
#' 
#' @name RetHTS
#' @docType data
#' @format a \code{MatH} object, a matrix of distributions.
#' @author Antonio Irpino, 2014-10-05
NULL















 






 

Try the HistDAWass package in your browser

Any scripts or data that you put into this service are public.

HistDAWass documentation built on March 20, 2018, 5:04 p.m.