#' Create a list containing metadata about available datasets
#'
#' @example
#' ds_list <- list_datasets()
#'
#' @export
list_datasets <- function()
{
list(
list(dataset_name = "Abalone",
short_name = "abalone",
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/",
file = "abalone.data",
zipfile = NULL,
delim = "csv",
col_names = c("sex","length","diameter","height","whole_weight","shucked_weight","viscera_weight","shell_weight","rings"),
default_target = "rings",
null_char = NULL,
default_task = "classif"),
list(dataset_name = "Adult",
short_name = "adult",
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/",
file = "adult.data",
zipfile = NULL,
delim = "csv",
col_names = c("age","workclass","fnlwgt","education","education_num","marital_status","occupation","realtionship","race","sex","capital_gain","capital_loss","hours_per_week","native_country","income"),
default_target = "income",
null_char = NULL,
default_task = "classif"),
list(dataset_name = "BlogFeedback",
short_name = "blog",
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00304/",
file = "BlogFeedback.zip",
zipfile = "blogData_train.csv",
delim = "csv",
col_names = FALSE,
default_target = NULL,
null_char = NULL,
default_task = "regr"),
list(dataset_name = "Facebook comment volume",
short_name = "facebook",
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00363/",
file = "Dataset.zip",
zipfile = "Dataset/Training/Features_Variant_1.csv",
delim = "csv",
col_names = c("likes","checkins","talking_about","category","derived1","derived2","derived3","derived4","derived5","derived6",
"derived7","derived8","derived9","derived10","derived11","derived12","derived13","derived14","derived15","derived16",
"derived17","derived18","derived19","derived20","derived21","derived22","derived23","derived24","derived25","cc1",
"cc2","cc3","cc4","cc5","base_time","post_length","post_shares","post_promo_status","h_local","post_pub_sun",
"post_pub_mon","post_pub_tue","post_pub_wed","post_pub_thu","post_pub_fri","post_pub_sat","base_date_sun",
"base_date_mon","base_date_tue","base_date_wed","base_date_thu","base_date_fri","base_date_sat","target"),
default_target = "target",
null_char = NULL,
default_task = "regr"),
list(dataset_name = "Communities and Crime", short_name = "crime",
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/communities/",
file = "communities.data",
zipfile = NULL,
delim = "csv",
col_names = c("state", "county", "community", "communityname", "fold",
"population", "householdsize", "racepctblack", "racePctWhite", "racePctAsian", "racePctHisp", "agePct12t21",
"agePct12t29", "agePct16t24", "agePct65up", "numbUrban", "pctUrban", "medIncome", "pctWWage", "pctWFarmSelf",
"pctWInvInc", "pctWSocSec", "pctWPubAsst", "pctWRetire", "medFamInc", "perCapInc", "whitePerCap",
"blackPerCap", "indianPerCap", "AsianPerCap", "OtherPerCap", "HispPerCap", "NumUnderPov", "PctPopUnderPov",
"PctLess9thGrade", "PctNotHSGrad", "PctBSorMore", "PctUnemployed", "PctEmploy", "PctEmplManu",
"PctEmplProfServ", "PctOccupManu", "PctOccupMgmtProf", "MalePctDivorce", "MalePctNevMarr", "FemalePctDiv",
"TotalPctDiv", "PersPerFam", "PctFam2Par", "PctKids2Par", "PctYoungKids2Par", "PctTeen2Par",
"PctWorkMomYoungKids", "PctWorkMom", "NumIlleg", "PctIlleg", "NumImmig", "PctImmigRecent", "PctImmigRec5",
"PctImmigRec8", "PctImmigRec10", "PctRecentImmig", "PctRecImmig5", "PctRecImmig8", "PctRecImmig10",
"PctSpeakEnglOnly", "PctNotSpeakEnglWell", "PctLargHouseFam", "PctLargHouseOccup", "PersPerOccupHous",
"PersPerOwnOccHous", "PersPerRentOccHous", "PctPersOwnOccup", "PctPersDenseHous", "PctHousLess3BR",
"MedNumBR", "HousVacant", "PctHousOccup", "PctHousOwnOcc", "PctVacantBoarded", "PctVacMore6Mos",
"MedYrHousBuilt", "PctHousNoPhone", "PctWOFullPlumb", "OwnOccLowQuart", "OwnOccMedVal", "OwnOccHiQuart",
"RentLowQ", "RentMedian", "RentHighQ", "MedRent", "MedRentPctHousInc", "MedOwnCostPctInc",
"MedOwnCostPctIncNoMtg", "NumInShelters", "NumStreet", "PctForeignBorn", "PctBornSameState",
"PctSameHouse85", "PctSameCity85", "PctSameState85", "LemasSwornFT", "LemasSwFTPerPop", "LemasSwFTFieldOps",
"LemasSwFTFieldPerPop", "LemasTotalReq", "LemasTotReqPerPop", "PolicReqPerOffic", "PolicPerPop",
"RacialMatchCommPol", "PctPolicWhite", "PctPolicBlack", "PctPolicHisp", "PctPolicAsian", "PctPolicMinor",
"OfficAssgnDrugUnits", "NumKindsDrugsSeiz", "PolicAveOTWorked", "LandArea", "PopDens", "PctUsePubTrans",
"PolicCars", "PolicOperBudg", "LemasPctPolicOnPatr", "LemasGangUnitDeploy", "LemasPctOfficDrugUn",
"PolicBudgPerPop", "ViolentCrimesPerPop"),
default_target = "ViolentCrimesPerPop",
null_char = "?",
exclude_cols = c("state", "county", "community", "communityname", "fold"),
default_task="regr"),
list(dataset_name = "3D Road Network (North Jutland, Denmark)",
short_name = "network3d",url="https://archive.ics.uci.edu/ml/machine-learning-databases/00246/",
file = "3D_spatial_network.txt",
zipfile = NULL,
delim = "csv",
col_names = FALSE,
default_target = NULL,
null_char = NULL,
default_task = "regr"),
list(dataset_name = "Airfoil Self-Noise",
short_name = "airfoil",
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00291/",
file = "airfoil_self_noise.dat",
zipfile = NULL,
delim = "tsv",
col_names = c("frequency","angle","chord_length","velocity","thickness","sound_pressure"),
default_target = "sound_pressure",
null_char = NULL,
default_task = "regr"),
list(dataset_name = "Air Quality",
short_name = "airquality",
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00360/",
file = "AirQualityUCI.zip",
zipfile = "AirQualityUCI.csv",
delim = ";",
col_names = TRUE,
default_target = "T",
null_char = NULL,
default_task = "regr"),
list(dataset_name = "Relative location of CT slices on axial axis",
short_name = "ct",
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00206/",
file = "slice_localization_data.zip",
zipfile = "slice_localization_data.csv",
delim = "csv",
col_names = TRUE,
default_target = NULL,
null_char = NULL,
default_task="regr"),
list(dataset_name = "Appliances energy prediction",
short_name = "appliances",
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00374/",
file = "energydata_complete.csv",
zipfile = NULL,
delim = "csv",
col_names = TRUE,
default_target = "Appliances",
null_char = NULL,
default_task = "regr"),
list(dataset_name = "Statlog (German Credit Data)",
short_name = "german",
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/",
file = "german.data",
zipfile = NULL,
delim = " ",
col_names = c("Status_existing_chkg_acct", "Duration_months", "Credit_history", "Purpose", "Credit_amount",
"Savings_acct", "Present_employment_since", "Payment_to_disposable_income", "Marital_status",
"Other_applicants", "Present_residence_since", "Property", "Age_years",
"Other_installment_plans", "Housing", "Existing_credits_this_bank", "Job",
"Number_of_dependents", "Telephone", "Foreign_worker", "Performance"),
default_target = "Performance",
null_char = NULL,
default_task = "classif"),
list(dataset_name = "Statlog (Australian Credit Approval)",
short_name = "australian",
url = "http://archive.ics.uci.edu/ml/machine-learning-databases/statlog/australian/",
file = "australian.dat",
zipfile = NULL,
delim = " ",
col_names = c("A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "A10", "A11", "A12", "A13", "A14", "A15"),
default_target = "A15",
null_char = NULL,
default_task = "classif")
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.