aixplain.modules.model.index_model
IndexFilterOperator Objects
class IndexFilterOperator(Enum)
Enumeration of operators available for filtering index records.
This enum defines the comparison operators that can be used when creating filters for searching and retrieving records from an index.
Attributes:
EQUALSstr - Equality operator ("==")NOT_EQUALSstr - Inequality operator ("!=")CONTAINSstr - Membership test operator ("in")NOT_CONTAINSstr - Negative membership test operator ("not in")GREATER_THANstr - Greater than operator (">")LESS_THANstr - Less than operator ("<")GREATER_THAN_OR_EQUALSstr - Greater than or equal to operator (">=")LESS_THAN_OR_EQUALSstr - Less than or equal to operator ("<=")
IndexFilter Objects
class IndexFilter()
A class representing a filter for querying index records.
This class defines a filter that can be used to search or retrieve records from an index based on specific field values and comparison operators.
Attributes:
fieldstr - The name of the field to filter on.valuestr - The value to compare against.operatorUnion[IndexFilterOperator, str] - The comparison operator to use.
__init__
def __init__(field: str, value: str, operator: Union[IndexFilterOperator,
str])
Initialize a new IndexFilter instance.
Arguments:
fieldstr - The name of the field to filter on.valuestr - The value to compare against.operatorUnion[IndexFilterOperator, str] - The comparison operator to use.
to_dict
def to_dict()
Convert the filter to a dictionary representation.
Returns:
dict- A dictionary containing the filter's field, value, and operator. The operator is converted to its string value if it's an IndexFilterOperator.
Splitter Objects
class Splitter()
A class for configuring how documents should be split during indexing.
This class provides options for splitting documents into smaller chunks before they are indexed, which can be useful for large documents or for specific search requirements.
Attributes:
splitbool - Whether to split the documents or not.split_bySplittingOptions - The method to use for splitting (e.g., by word, sentence).split_lengthint - The length of each split chunk.split_overlapint - The number of overlapping units between consecutive chunks.
__init__
def __init__(split: bool = False,
split_by: SplittingOptions = SplittingOptions.WORD,
split_length: int = 1,
split_overlap: int = 0)
Initialize a new Splitter instance.
Arguments:
splitbool, optional - Whether to split the documents. Defaults to False.split_bySplittingOptions, optional - The method to use for splitting. Defaults to SplittingOptions.WORD.split_lengthint, optional - The length of each split chunk. Defaults to 1.split_overlapint, optional - The number of overlapping units between consecutive chunks. Defaults to 0.
IndexModel Objects
class IndexModel(Model)
__init__
def __init__(id: Text,
name: Text,
description: Text = "",
api_key: Optional[Text] = None,
supplier: Union[Dict, Text, Supplier, int] = "aiXplain",
version: Optional[Text] = None,
function: Optional[Function] = None,
is_subscribed: bool = False,
cost: Optional[Dict] = None,
embedding_model: Union[EmbeddingModel, str] = None,
function_type: Optional[FunctionType] = FunctionType.SEARCH,
**additional_info) -> None
Initialize a new IndexModel instance.
Arguments:
idText - ID of the Index Model.nameText - Name of the Index Model.descriptionText, optional - Description of the Index Model. Defaults to "".api_keyText, optional - API key of the Index Model. Defaults to None.supplierUnion[Dict, Text, Supplier, int], optional - Supplier of the Index Model. Defaults to "aiXplain".versionText, optional - Version of the Index Model. Defaults to "1.0".functionFunction, optional - Function of the Index Model. Must be Function.SEARCH.is_subscribedbool, optional - Whether the user is subscribed. Defaults to False.costDict, optional - Cost of the Index Model. Defaults to None.embedding_modelUnion[EmbeddingModel, str], optional - Model used for embedding documents. Defaults to None.name0 FunctionType, optional - Type of the function. Defaults to FunctionType.SEARCH.name1 - Any additional Index Model info to be saved.
Raises:
name2 - If function is not Function.SEARCH.
to_dict
def to_dict() -> Dict
Convert the IndexModel instance to a dictionary representation.
Returns:
Dict- A dictionary containing the model's attributes, including:- All attributes from the parent Model class
- embedding_model: The model used for embedding documents
- embedding_size: The size of the embeddings produced
- collection_type: The type of collection derived from the version
search
def search(query: str,
top_k: int = 10,
filters: List[IndexFilter] = []) -> ModelResponse
Search for documents in the index
Arguments:
querystr - Query to be searchedtop_kint, optional - Number of results to be returned. Defaults to 10.filtersList[IndexFilter], optional - Filters to be applied. Defaults to [].
Returns:
ModelResponse- Response from the indexing service
Example:
- index_model.search("Hello")
- index_model.search("", filters=[IndexFilter(field="category", value="animate", operator=IndexFilterOperator.EQUALS)])
upsert
def upsert(documents: Union[List[Record], str],
splitter: Optional[Splitter] = None) -> ModelResponse
Upsert documents into the index
Arguments:
documentsUnion[List[Record], str] - List of documents to be upserted or a file pathsplitterSplitter, optional - Splitter to be applied. Defaults to None.
Returns:
ModelResponse- Response from the indexing service
Examples:
index_model.upsert([Record(value="Hello, world!", value_type="text", uri="", id="1", attributes={})]) index_model.upsert([Record(value="Hello, world!", value_type="text", uri="", id="1", attributes={})], splitter=Splitter(split=True, split_by=SplittingOptions.WORD, split_length=1, split_overlap=0)) index_model.upsert("my_file.pdf") index_model.upsert("my_file.pdf", splitter=Splitter(split=True, split_by=SplittingOptions.WORD, split_length=400, split_overlap=50)) Splitter in the above example is optional and can be used to split the documents into smaller chunks.
count
def count() -> int
Get the total number of documents in the index.
Returns:
float- The number of documents in the index.
Raises:
Exception- If the count operation fails.
Example:
>>> index_model.count() 42
get_record
def get_record(record_id: Text) -> ModelResponse
Get a document from the index.
Arguments:
record_idText - ID of the document to retrieve.
Returns:
ModelResponse- Response containing the retrieved document data.
Raises:
Exception- If document retrieval fails.
Example:
>>> index_model.get_record("123")
delete_record
def delete_record(record_id: Text) -> ModelResponse
Delete a document from the index.
Arguments:
record_idText - ID of the document to delete.
Returns:
ModelResponse- Response containing the deleted document data.
Raises:
Exception- If document deletion fails.
Example:
>>> index_model.delete_record("123")
prepare_record_from_file
def prepare_record_from_file(file_path: str, file_id: str = None) -> Record
Prepare a record from a file.
Arguments:
file_pathstr - The path to the file to be processed.file_idstr, optional - The ID to assign to the record. If not provided, a unique ID is generated.
Returns:
Record- A Record object containing the file's content and metadata.
Raises:
Exception- If the file cannot be parsed.
Example:
>>> record = index_model.prepare_record_from_file("/path/to/file.txt")
parse_file
@staticmethod
def parse_file(file_path: str) -> ModelResponse
Parse a file using the Docling model.
Arguments:
file_pathstr - The path to the file to be parsed.
Returns:
ModelResponse- The response containing the parsed file content.
Raises:
Exception- If the file does not exist or cannot be parsed.
Example:
>>> response = IndexModel.parse_file("/path/to/file.pdf")
retrieve_records_with_filter
def retrieve_records_with_filter(filter: IndexFilter) -> ModelResponse
Retrieve records from the index that match the given filter.
Arguments:
filterIndexFilter - The filter criteria to apply when retrieving records.
Returns:
ModelResponse- Response containing the retrieved records.
Raises:
Exception- If retrieval fails.
Example:
>>> from aixplain.modules.model.index_model import IndexFilter, IndexFilterOperator >>> my_filter = IndexFilter(field="category", value="world", operator=IndexFilterOperator.EQUALS) >>> index_model.retrieve_records_with_filter(my_filter)
delete_records_by_date
def delete_records_by_date(date: float) -> ModelResponse
Delete records from the index that match the given date.
Arguments:
datefloat - The date (as a timestamp) to match records for deletion.
Returns:
ModelResponse- Response containing the result of the deletion operation.
Raises:
Exception- If deletion fails.
Example:
>>> index_model.delete_records_by_date(1717708800)