camel.datahubs package

On this page

camel.datahubs package#

Submodules#

camel.datahubs.base module#

class camel.datahubs.base.BaseDatasetManager[source]#

Bases: ABC

Abstract base class for dataset managers.

abstract add_records(dataset_name: str, records: List[Record], filepath: str = 'records/records.json', **kwargs: Any) None[source]#

Adds records to a dataset.

Parameters:
  • dataset_name (str) – The name of the dataset.

  • records (List[Record]) – A list of records to add to the dataset.

  • filepath (str) – The path to the file containing the records. (default:"records/records.json")

  • kwargs (Any) – Additional keyword arguments.

abstract create_dataset(name: str, **kwargs: Any) str[source]#

Creates a new dataset.

Parameters:
  • name (str) – The name of the dataset.

  • kwargs (Any) – Additional keyword arguments.

Returns:

The URL of the created dataset.

Return type:

str

abstract delete_dataset(dataset_name: str, **kwargs: Any) None[source]#

Deletes a dataset.

Parameters:
  • dataset_name (str) – The name of the dataset to delete.

  • kwargs (Any) – Additional keyword arguments.

abstract delete_record(dataset_name: str, record_id: str, filepath: str = 'records/records.json', **kwargs: Any) None[source]#

Deletes a record from the dataset.

Parameters:
  • dataset_name (str) – The name of the dataset.

  • record_id (str) – The ID of the record to delete.

  • filepath (str) – The path to the file containing the records. (default:"records/records.json")

  • kwargs (Any) – Additional keyword arguments.

abstract list_datasets(username: str, limit: int = 100, **kwargs: Any) List[str][source]#

Lists all datasets for the current user.

Parameters:
  • username (str) – The username of the user whose datasets to list.

  • limit (int) – The maximum number of datasets to list. (default:100)

  • kwargs (Any) – Additional keyword arguments.

Returns:

A list of dataset ids.

Return type:

List[str]

abstract list_records(dataset_name: str, filepath: str = 'records/records.json', **kwargs: Any) List[Record][source]#

Lists records in a dataset.

Parameters:
  • dataset_name (str) – The name of the dataset.

  • filepath (str) – The path to the file containing the records. (default:"records/records.json")

  • kwargs (Any) – Additional keyword arguments.

abstract update_records(dataset_name: str, records: List[Record], filepath: str = 'records/records.json', **kwargs: Any) None[source]#

Updates records in a dataset.

Parameters:
  • dataset_name (str) – The name of the dataset.

  • records (List[Record]) – A list of records to update in the dataset.

  • filepath (str) – The path to the file containing the records. (default:"records/records.json")

  • kwargs (Any) – Additional keyword arguments.

camel.datahubs.huggingface module#

class camel.datahubs.huggingface.HuggingFaceDatasetManager(token: str | None = None)[source]#

Bases: BaseDatasetManager

A dataset manager for Hugging Face datasets. This class provides methods to create, add, update, delete, and list records in a dataset on the Hugging Face Hub.

Parameters:

token (str) – The Hugging Face API token. If not provided, the token will be read from the environment variable HF_TOKEN.

add_records(dataset_name: str, records: List[Record], filepath: str = 'records/records.json', **kwargs: Any) None[source]#

Adds records to a dataset on the Hugging Face Hub.

Parameters:
  • dataset_name (str) – The name of the dataset.

  • records (List[Record]) – A list of records to add to the dataset.

  • filepath (str) – The path to the file containing the records.

  • kwargs (Any) – Additional keyword arguments.

Raises:

ValueError – If the dataset already has a records file.

create_dataset(name: str, private: bool = False, **kwargs: Any) str[source]#

Creates a new dataset on the Hugging Face Hub.

Parameters:
  • name (str) – The name of the dataset.

  • private (bool) – Whether the dataset should be private. defaults to False.

  • kwargs (Any) – Additional keyword arguments.

Returns:

The URL of the created dataset.

Return type:

str

create_dataset_card(dataset_name: str, description: str, license: str | None = None, version: str | None = None, tags: List[str] | None = None, authors: List[str] | None = None, size_category: List[str] | None = None, language: List[str] | None = None, task_categories: List[str] | None = None, content: str | None = None) None[source]#
Creates and uploads a dataset card to the Hugging Face Hub in YAML

format.

Parameters:
  • dataset_name (str) – The name of the dataset.

  • description (str) – A description of the dataset.

  • license (str) – The license of the dataset. (default: None)

  • version (str) – The version of the dataset. (default: None)

  • tags (list) – A list of tags for the dataset.(default: None)

  • authors (list) – A list of authors of the dataset. (default: None)

  • size_category (list) – A size category for the dataset. (default: None)

  • language (list) – A list of languages the dataset is in. (default: None)

  • task_categories (list) – A list of task categories. (default: None)

  • content (str) – Custom markdown content that the user wants to add to the dataset card. (default: None)

delete_dataset(dataset_name: str, **kwargs: Any) None[source]#

Deletes a dataset from the Hugging Face Hub.

Parameters:
  • dataset_name (str) – The name of the dataset to delete.

  • kwargs (Any) – Additional keyword arguments.

delete_record(dataset_name: str, record_id: str, filepath: str = 'records/records.json', **kwargs: Any) None[source]#

Deletes a record from the dataset.

Parameters:
  • dataset_name (str) – The name of the dataset.

  • record_id (str) – The ID of the record to delete.

  • filepath (str) – The path to the file containing the records.

  • kwargs (Any) – Additional keyword arguments.

Raises:

ValueError – If the dataset does not have an existing file to delete records from.

list_datasets(username: str, limit: int = 100, **kwargs: Any) List[str][source]#

Lists all datasets for the current user.

Parameters:
  • username (str) – The username of the user whose datasets to list.

  • limit (int) – The maximum number of datasets to list. (default: 100)

  • kwargs (Any) – Additional keyword arguments.

Returns:

A list of dataset ids.

Return type:

List[str]

list_records(dataset_name: str, filepath: str = 'records/records.json', **kwargs: Any) List[Record][source]#

Lists all records in a dataset.

Parameters:
  • dataset_name (str) – The name of the dataset.

  • filepath (str) – The path to the file containing the records.

  • kwargs (Any) – Additional keyword arguments.

Returns:

A list of records in the dataset.

Return type:

List[Record]

update_records(dataset_name: str, records: List[Record], filepath: str = 'records/records.json', **kwargs: Any) None[source]#

Updates records in a dataset on the Hugging Face Hub.

Parameters:
  • dataset_name (str) – The name of the dataset.

  • records (List[Record]) – A list of records to update in the dataset.

  • filepath (str) – The path to the file containing the records.

  • kwargs (Any) – Additional keyword arguments.

Raises:

ValueError – If the dataset does not have an existing file to update records in.

camel.datahubs.models module#

class camel.datahubs.models.Record(*, id: str | None = None, metadata: Dict[str, Any] | None = None, content: Dict[str, Any] | None = None, **extra_data: Any)[source]#

Bases: BaseModel

content: Dict[str, Any] | None#
id: str | None#
metadata: Dict[str, Any] | None#
model_computed_fields: ClassVar[Dict[str, ComputedFieldInfo]] = {}#

A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

model_config: ClassVar[ConfigDict] = {'extra': 'allow'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

model_fields: ClassVar[Dict[str, FieldInfo]] = {'content': FieldInfo(annotation=Union[Dict[str, Any], NoneType], required=False, default=None), 'id': FieldInfo(annotation=Union[str, NoneType], required=False, default=None), 'metadata': FieldInfo(annotation=Union[Dict[str, Any], NoneType], required=False, default=None)}#

Metadata about the fields defined on the model, mapping of field names to [FieldInfo][pydantic.fields.FieldInfo] objects.

This replaces Model.__fields__ from Pydantic V1.

Module contents#

class camel.datahubs.BaseDatasetManager[source]#

Bases: ABC

Abstract base class for dataset managers.

abstract add_records(dataset_name: str, records: List[Record], filepath: str = 'records/records.json', **kwargs: Any) None[source]#

Adds records to a dataset.

Parameters:
  • dataset_name (str) – The name of the dataset.

  • records (List[Record]) – A list of records to add to the dataset.

  • filepath (str) – The path to the file containing the records. (default:"records/records.json")

  • kwargs (Any) – Additional keyword arguments.

abstract create_dataset(name: str, **kwargs: Any) str[source]#

Creates a new dataset.

Parameters:
  • name (str) – The name of the dataset.

  • kwargs (Any) – Additional keyword arguments.

Returns:

The URL of the created dataset.

Return type:

str

abstract delete_dataset(dataset_name: str, **kwargs: Any) None[source]#

Deletes a dataset.

Parameters:
  • dataset_name (str) – The name of the dataset to delete.

  • kwargs (Any) – Additional keyword arguments.

abstract delete_record(dataset_name: str, record_id: str, filepath: str = 'records/records.json', **kwargs: Any) None[source]#

Deletes a record from the dataset.

Parameters:
  • dataset_name (str) – The name of the dataset.

  • record_id (str) – The ID of the record to delete.

  • filepath (str) – The path to the file containing the records. (default:"records/records.json")

  • kwargs (Any) – Additional keyword arguments.

abstract list_datasets(username: str, limit: int = 100, **kwargs: Any) List[str][source]#

Lists all datasets for the current user.

Parameters:
  • username (str) – The username of the user whose datasets to list.

  • limit (int) – The maximum number of datasets to list. (default:100)

  • kwargs (Any) – Additional keyword arguments.

Returns:

A list of dataset ids.

Return type:

List[str]

abstract list_records(dataset_name: str, filepath: str = 'records/records.json', **kwargs: Any) List[Record][source]#

Lists records in a dataset.

Parameters:
  • dataset_name (str) – The name of the dataset.

  • filepath (str) – The path to the file containing the records. (default:"records/records.json")

  • kwargs (Any) – Additional keyword arguments.

abstract update_records(dataset_name: str, records: List[Record], filepath: str = 'records/records.json', **kwargs: Any) None[source]#

Updates records in a dataset.

Parameters:
  • dataset_name (str) – The name of the dataset.

  • records (List[Record]) – A list of records to update in the dataset.

  • filepath (str) – The path to the file containing the records. (default:"records/records.json")

  • kwargs (Any) – Additional keyword arguments.

class camel.datahubs.HuggingFaceDatasetManager(token: str | None = None)[source]#

Bases: BaseDatasetManager

A dataset manager for Hugging Face datasets. This class provides methods to create, add, update, delete, and list records in a dataset on the Hugging Face Hub.

Parameters:

token (str) – The Hugging Face API token. If not provided, the token will be read from the environment variable HF_TOKEN.

add_records(dataset_name: str, records: List[Record], filepath: str = 'records/records.json', **kwargs: Any) None[source]#

Adds records to a dataset on the Hugging Face Hub.

Parameters:
  • dataset_name (str) – The name of the dataset.

  • records (List[Record]) – A list of records to add to the dataset.

  • filepath (str) – The path to the file containing the records.

  • kwargs (Any) – Additional keyword arguments.

Raises:

ValueError – If the dataset already has a records file.

create_dataset(name: str, private: bool = False, **kwargs: Any) str[source]#

Creates a new dataset on the Hugging Face Hub.

Parameters:
  • name (str) – The name of the dataset.

  • private (bool) – Whether the dataset should be private. defaults to False.

  • kwargs (Any) – Additional keyword arguments.

Returns:

The URL of the created dataset.

Return type:

str

create_dataset_card(dataset_name: str, description: str, license: str | None = None, version: str | None = None, tags: List[str] | None = None, authors: List[str] | None = None, size_category: List[str] | None = None, language: List[str] | None = None, task_categories: List[str] | None = None, content: str | None = None) None[source]#
Creates and uploads a dataset card to the Hugging Face Hub in YAML

format.

Parameters:
  • dataset_name (str) – The name of the dataset.

  • description (str) – A description of the dataset.

  • license (str) – The license of the dataset. (default: None)

  • version (str) – The version of the dataset. (default: None)

  • tags (list) – A list of tags for the dataset.(default: None)

  • authors (list) – A list of authors of the dataset. (default: None)

  • size_category (list) – A size category for the dataset. (default: None)

  • language (list) – A list of languages the dataset is in. (default: None)

  • task_categories (list) – A list of task categories. (default: None)

  • content (str) – Custom markdown content that the user wants to add to the dataset card. (default: None)

delete_dataset(dataset_name: str, **kwargs: Any) None[source]#

Deletes a dataset from the Hugging Face Hub.

Parameters:
  • dataset_name (str) – The name of the dataset to delete.

  • kwargs (Any) – Additional keyword arguments.

delete_record(dataset_name: str, record_id: str, filepath: str = 'records/records.json', **kwargs: Any) None[source]#

Deletes a record from the dataset.

Parameters:
  • dataset_name (str) – The name of the dataset.

  • record_id (str) – The ID of the record to delete.

  • filepath (str) – The path to the file containing the records.

  • kwargs (Any) – Additional keyword arguments.

Raises:

ValueError – If the dataset does not have an existing file to delete records from.

list_datasets(username: str, limit: int = 100, **kwargs: Any) List[str][source]#

Lists all datasets for the current user.

Parameters:
  • username (str) – The username of the user whose datasets to list.

  • limit (int) – The maximum number of datasets to list. (default: 100)

  • kwargs (Any) – Additional keyword arguments.

Returns:

A list of dataset ids.

Return type:

List[str]

list_records(dataset_name: str, filepath: str = 'records/records.json', **kwargs: Any) List[Record][source]#

Lists all records in a dataset.

Parameters:
  • dataset_name (str) – The name of the dataset.

  • filepath (str) – The path to the file containing the records.

  • kwargs (Any) – Additional keyword arguments.

Returns:

A list of records in the dataset.

Return type:

List[Record]

update_records(dataset_name: str, records: List[Record], filepath: str = 'records/records.json', **kwargs: Any) None[source]#

Updates records in a dataset on the Hugging Face Hub.

Parameters:
  • dataset_name (str) – The name of the dataset.

  • records (List[Record]) – A list of records to update in the dataset.

  • filepath (str) – The path to the file containing the records.

  • kwargs (Any) – Additional keyword arguments.

Raises:

ValueError – If the dataset does not have an existing file to update records in.

class camel.datahubs.Record(*, id: str | None = None, metadata: Dict[str, Any] | None = None, content: Dict[str, Any] | None = None, **extra_data: Any)[source]#

Bases: BaseModel

content: Dict[str, Any] | None#
id: str | None#
metadata: Dict[str, Any] | None#
model_computed_fields: ClassVar[Dict[str, ComputedFieldInfo]] = {}#

A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

model_config: ClassVar[ConfigDict] = {'extra': 'allow'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

model_fields: ClassVar[Dict[str, FieldInfo]] = {'content': FieldInfo(annotation=Union[Dict[str, Any], NoneType], required=False, default=None), 'id': FieldInfo(annotation=Union[str, NoneType], required=False, default=None), 'metadata': FieldInfo(annotation=Union[Dict[str, Any], NoneType], required=False, default=None)}#

Metadata about the fields defined on the model, mapping of field names to [FieldInfo][pydantic.fields.FieldInfo] objects.

This replaces Model.__fields__ from Pydantic V1.