module aixplain.factories.dataset_factory
class DatasetFactory
A static class for creating and exploring Dataset Objects.
Attributes:
backend_url
(str): The URL for the backend.
classmethod create
create(
name: str,
description: str,
license: License,
function: Function,
input_schema: List[Union[Dict, MetaData]],
output_schema: List[Union[Dict, MetaData]] = [],
hypotheses_schema: List[Union[Dict, MetaData]] = [],
metadata_schema: List[Union[Dict, MetaData]] = [],
content_path: Union[str, Path, List[Union[str, Path]]] = [],
input_ref_data: Dict[str, Any] = {},
output_ref_data: Dict[str, List[Any]] = {},
hypotheses_ref_data: Dict[str, Any] = {},
meta_ref_data: Dict[str, Any] = {},
tags: List[str] = [],
privacy: Privacy = <Privacy.PRIVATE: 'Private'>,
split_labels: Optional[List[str]] = None,
split_rate: Optional[List[float]] = None,
error_handler: ErrorHandler = <ErrorHandler.SKIP: 'skip'>,
s3_link: Optional[str] = None,
aws_credentials: Optional[Dict[str, str]] = {'AWS_ACCESS_KEY_ID': None, 'AWS_SECRET_ACCESS_KEY': None},
api_key: Optional[str] = None
) → Dict
Dataset Onboard
Args:
name
(Text): dataset namedescription
(Text): dataset descriptionlicense
(License): dataset licensefunction
(Function): dataset functioninput_schema
(List[Union[Dict, MetaData]]): metadata of inputsoutput_schema
(List[Union[Dict, MetaData]]): metadata of outputshypotheses_schema
(List[Union[Dict, MetaData]], optional): schema of the hypotheses to the references. Defaults to [].metadata_schema
(List[Union[Dict, MetaData]], optional): metadata of metadata information of the dataset. Defaults to [].content_path
(Union[Union[Text, Path], List[Union[Text, Path]]]): path to files which contain the data contentinput_ref_data
(Dict[Text, Any], optional): reference to input data which is already in the platform. Defaults to .output_ref_data
(Dict[Text, List[Any]], optional): reference to output data which is already in the platform. Defaults to .hypotheses_ref_data
(Dict[Text, Any], optional): hypotheses which are already in the platform. Defaults to .meta_ref_data
(Dict[Text, Any], optional): metadata which is already in the platform. Defaults to .tags
(List[Text], optional): datasets description tags. Defaults to [].privacy
(Privacy, optional): dataset privacy. Defaults to Privacy.PRIVATE.error_handler
(ErrorHandler, optional): how to handle failed rows in the data asset. Defaults to ErrorHandler.SKIP.s3_link
(Optional[Text]): s3 url to files or directoriesaws_credentials (Optional[Dict[Text, Text]])
: credentials for AWS and it should contains these two keysAWS_ACCESS_KEY_ID
andAWS_SECRET_ACCESS_KEY
api_key
(Optional[Text]): team api key. Defaults to None.
Returns:
Dict
: dataset onboard status
classmethod get
get(dataset_id: str) → Dataset
Create a 'Dataset' object from dataset id
Args:
dataset_id
(Text): Dataset ID of required dataset.
Returns:
Dataset
: Created 'Dataset' object
classmethod list
list(
query: Optional[str] = None,
function: Optional[Function] = None,
source_languages: Optional[Language, List[Language]] = None,
target_languages: Optional[Language, List[Language]] = None,
data_type: Optional[DataType] = None,
license: Optional[License] = None,
is_referenceless: Optional[bool] = None,
page_number: int = 0,
page_size: int = 20
) → Dict
Listing Datasets
Args:
query
(Optional[Text], optional): search query. Defaults to None.function
(Optional[Function], optional): function filter. Defaults to None.source_languages
(Optional[Union[Language, List[Language]]], optional): language filter of input data. Defaults to None.target_languages
(Optional[Union[Language, List[Language]]], optional): language filter of output data. Defaults to None.data_type
(Optional[DataType], optional): data type filter. Defaults to None.license
(Optional[License], optional): license filter. Defaults to None.is_referenceless
(Optional[bool], optional): has reference filter. Defaults to None.page_number
(int, optional): page number. Defaults to 0.page_size
(int, optional): page size. Defaults to 20.
Returns:
Dict
: list of datasets in agreement with the filters, page number, page total and total elements