Dataset viewer documentation
Get dataset information
Get Started
Guides
Check dataset validityList splits and subsetsGet dataset informationPreview a datasetDownload slices of rowsSearch text in a datasetFilter rows in a datasetList Parquet filesGet the number of rows and the bytes sizeExplore dataset statisticsGet Croissant metadata
Query datasets from dataset viewer API
Conceptual Guides
Get dataset information
The dataset viewer provides an /info endpoint for exploring the general information about dataset, including such fields as description, citation, homepage, license and features.
The /info endpoint accepts two query parameters:
dataset: the dataset nameconfig: the subset name
Python
JavaScript
cURL
import requests
headers = {"Authorization": f"Bearer {API_TOKEN}"}
API_URL = "https://datasets-server.huggingface.co/info?dataset=ibm/duorc&config=SelfRC"
def query():
response = requests.get(API_URL, headers=headers)
return response.json()
data = query()The endpoint response is a JSON with the dataset_info key. Its structure and content correspond to DatasetInfo object of the datasets library.
{
"dataset_info": {
"description": "",
"citation": "",
"homepage": "",
"license": "",
"features": {
"plot_id": { "dtype": "string", "_type": "Value" },
"plot": { "dtype": "string", "_type": "Value" },
"title": { "dtype": "string", "_type": "Value" },
"question_id": { "dtype": "string", "_type": "Value" },
"question": { "dtype": "string", "_type": "Value" },
"answers": {
"feature": { "dtype": "string", "_type": "Value" },
"_type": "List"
},
"no_answer": { "dtype": "bool", "_type": "Value" }
},
"builder_name": "parquet",
"dataset_name": "duorc",
"config_name": "SelfRC",
"version": { "version_str": "0.0.0", "major": 0, "minor": 0, "patch": 0 },
"splits": {
"train": {
"name": "train",
"num_bytes": 248966361,
"num_examples": 60721,
"dataset_name": null
},
"validation": {
"name": "validation",
"num_bytes": 56359392,
"num_examples": 12961,
"dataset_name": null
},
"test": {
"name": "test",
"num_bytes": 51022318,
"num_examples": 12559,
"dataset_name": null
}
},
"download_size": 21001846,
"dataset_size": 356348071
},
"partial": false
}