R/list_datasets.R

Defines functions list_datasets

Documented in list_datasets

#' Create a list containing metadata about available datasets
#'
#' @example
#' ds_list <- list_datasets()
#'
#' @export

list_datasets <- function()
{
  list(
    list(dataset_name = "Abalone",
         short_name = "abalone",
         url = "https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/",
         file = "abalone.data",
         zipfile = NULL,
         delim = "csv",
         col_names = c("sex","length","diameter","height","whole_weight","shucked_weight","viscera_weight","shell_weight","rings"),
         default_target = "rings",
         null_char = NULL,
         default_task = "classif"),
    list(dataset_name = "Adult",
         short_name = "adult",
         url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/",
         file = "adult.data",
         zipfile = NULL,
         delim = "csv",
         col_names = c("age","workclass","fnlwgt","education","education_num","marital_status","occupation","realtionship","race","sex","capital_gain","capital_loss","hours_per_week","native_country","income"),
         default_target = "income",
         null_char = NULL,
         default_task = "classif"),
    list(dataset_name = "BlogFeedback",
         short_name = "blog",
         url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00304/",
         file = "BlogFeedback.zip",
         zipfile = "blogData_train.csv",
         delim = "csv",
         col_names = FALSE,
         default_target = NULL,
         null_char = NULL,
         default_task = "regr"),
    list(dataset_name = "Facebook comment volume",
         short_name = "facebook",
         url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00363/",
         file = "Dataset.zip",
         zipfile = "Dataset/Training/Features_Variant_1.csv",
         delim = "csv",
         col_names = c("likes","checkins","talking_about","category","derived1","derived2","derived3","derived4","derived5","derived6",
                     "derived7","derived8","derived9","derived10","derived11","derived12","derived13","derived14","derived15","derived16",
                     "derived17","derived18","derived19","derived20","derived21","derived22","derived23","derived24","derived25","cc1",
                     "cc2","cc3","cc4","cc5","base_time","post_length","post_shares","post_promo_status","h_local","post_pub_sun",
                     "post_pub_mon","post_pub_tue","post_pub_wed","post_pub_thu","post_pub_fri","post_pub_sat","base_date_sun",
                     "base_date_mon","base_date_tue","base_date_wed","base_date_thu","base_date_fri","base_date_sat","target"),
         default_target = "target",
         null_char = NULL,
         default_task = "regr"),
    list(dataset_name = "Communities and Crime", short_name = "crime",
         url = "https://archive.ics.uci.edu/ml/machine-learning-databases/communities/",
         file = "communities.data",
         zipfile = NULL,
         delim = "csv",
         col_names = c("state", "county", "community", "communityname", "fold",
                       "population", "householdsize", "racepctblack", "racePctWhite", "racePctAsian", "racePctHisp", "agePct12t21",
                       "agePct12t29", "agePct16t24", "agePct65up", "numbUrban", "pctUrban", "medIncome", "pctWWage", "pctWFarmSelf",
                       "pctWInvInc", "pctWSocSec", "pctWPubAsst", "pctWRetire", "medFamInc", "perCapInc", "whitePerCap",
                       "blackPerCap", "indianPerCap", "AsianPerCap", "OtherPerCap", "HispPerCap", "NumUnderPov", "PctPopUnderPov",
                       "PctLess9thGrade", "PctNotHSGrad", "PctBSorMore", "PctUnemployed", "PctEmploy", "PctEmplManu",
                       "PctEmplProfServ", "PctOccupManu", "PctOccupMgmtProf", "MalePctDivorce", "MalePctNevMarr", "FemalePctDiv",
                       "TotalPctDiv", "PersPerFam", "PctFam2Par", "PctKids2Par", "PctYoungKids2Par", "PctTeen2Par",
                       "PctWorkMomYoungKids", "PctWorkMom", "NumIlleg", "PctIlleg", "NumImmig", "PctImmigRecent", "PctImmigRec5",
                       "PctImmigRec8", "PctImmigRec10", "PctRecentImmig", "PctRecImmig5", "PctRecImmig8", "PctRecImmig10",
                       "PctSpeakEnglOnly", "PctNotSpeakEnglWell", "PctLargHouseFam", "PctLargHouseOccup", "PersPerOccupHous",
                       "PersPerOwnOccHous", "PersPerRentOccHous", "PctPersOwnOccup", "PctPersDenseHous", "PctHousLess3BR",
                       "MedNumBR", "HousVacant", "PctHousOccup", "PctHousOwnOcc", "PctVacantBoarded", "PctVacMore6Mos",
                       "MedYrHousBuilt", "PctHousNoPhone", "PctWOFullPlumb", "OwnOccLowQuart", "OwnOccMedVal", "OwnOccHiQuart",
                       "RentLowQ", "RentMedian", "RentHighQ", "MedRent", "MedRentPctHousInc", "MedOwnCostPctInc",
                       "MedOwnCostPctIncNoMtg", "NumInShelters", "NumStreet", "PctForeignBorn", "PctBornSameState",
                       "PctSameHouse85", "PctSameCity85", "PctSameState85", "LemasSwornFT", "LemasSwFTPerPop", "LemasSwFTFieldOps",
                       "LemasSwFTFieldPerPop", "LemasTotalReq", "LemasTotReqPerPop", "PolicReqPerOffic", "PolicPerPop",
                       "RacialMatchCommPol", "PctPolicWhite", "PctPolicBlack", "PctPolicHisp", "PctPolicAsian", "PctPolicMinor",
                       "OfficAssgnDrugUnits", "NumKindsDrugsSeiz", "PolicAveOTWorked", "LandArea", "PopDens", "PctUsePubTrans",
                       "PolicCars", "PolicOperBudg", "LemasPctPolicOnPatr", "LemasGangUnitDeploy", "LemasPctOfficDrugUn",
                       "PolicBudgPerPop", "ViolentCrimesPerPop"),
         default_target = "ViolentCrimesPerPop",
         null_char = "?",
         exclude_cols = c("state", "county", "community", "communityname", "fold"),
         default_task="regr"),
    list(dataset_name = "3D Road Network (North Jutland, Denmark)",
         short_name = "network3d",url="https://archive.ics.uci.edu/ml/machine-learning-databases/00246/",
         file = "3D_spatial_network.txt",
         zipfile = NULL,
         delim = "csv",
         col_names = FALSE,
         default_target = NULL,
         null_char = NULL,
         default_task = "regr"),
    list(dataset_name = "Airfoil Self-Noise",
         short_name = "airfoil",
         url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00291/",
         file = "airfoil_self_noise.dat",
         zipfile = NULL,
         delim = "tsv",
         col_names = c("frequency","angle","chord_length","velocity","thickness","sound_pressure"),
         default_target = "sound_pressure",
         null_char = NULL,
         default_task = "regr"),
    list(dataset_name = "Air Quality",
         short_name = "airquality",
         url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00360/",
         file = "AirQualityUCI.zip",
         zipfile = "AirQualityUCI.csv",
         delim = ";",
         col_names = TRUE,
         default_target = "T",
         null_char = NULL,
         default_task = "regr"),
    list(dataset_name = "Relative location of CT slices on axial axis",
         short_name = "ct",
         url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00206/",
         file = "slice_localization_data.zip",
         zipfile = "slice_localization_data.csv",
         delim = "csv",
         col_names = TRUE,
         default_target = NULL,
         null_char = NULL,
         default_task="regr"),
    list(dataset_name = "Appliances energy prediction",
         short_name = "appliances",
         url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00374/",
         file = "energydata_complete.csv",
         zipfile = NULL,
         delim = "csv",
         col_names = TRUE,
         default_target = "Appliances",
         null_char = NULL,
         default_task = "regr"),
    list(dataset_name = "Statlog (German Credit Data)",
         short_name = "german",
         url = "https://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/",
         file = "german.data",
         zipfile = NULL,
         delim = " ",
         col_names = c("Status_existing_chkg_acct", "Duration_months", "Credit_history", "Purpose", "Credit_amount",
                       "Savings_acct", "Present_employment_since", "Payment_to_disposable_income", "Marital_status",
                       "Other_applicants", "Present_residence_since", "Property", "Age_years",
                       "Other_installment_plans", "Housing", "Existing_credits_this_bank", "Job",
                       "Number_of_dependents", "Telephone", "Foreign_worker", "Performance"),
         default_target = "Performance",
         null_char = NULL,
         default_task = "classif"),
    list(dataset_name = "Statlog (Australian Credit Approval)",
         short_name = "australian",
         url = "http://archive.ics.uci.edu/ml/machine-learning-databases/statlog/australian/",
         file = "australian.dat",
         zipfile = NULL,
         delim = " ",
         col_names = c("A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "A10", "A11", "A12", "A13", "A14", "A15"),
         default_target = "A15",
         null_char = NULL,
         default_task = "classif")
  )
}
Prometheus77/ucimlr documentation built on Jan. 16, 2020, 1:43 p.m.