WeaviateStorage

class WeaviateStorage(BaseVectorStorage):

An implementation of the BaseVectorStorage for interacting with Weaviate, a cloud-native vector search engine.

This class provides multiple ways to connect to Weaviate instances:

  • Weaviate Cloud (WCD)
  • Local Docker/Kubernetes instances
  • Embedded Weaviate
  • Custom connection parameters

Parameters:

  • vector_dim (int): The dimension of storing vectors.
  • collection_name (Optional[str], optional): Name for the collection in Weaviate. If not provided, generates a unique name based on current timestamp. (default: :obj:None)
  • connection_type (ConnectionType, optional): Type of connection to use. Supported types: ‘local’, ‘cloud’, ‘embedded’, ‘custom’. (default: :obj:"local") # Weaviate Cloud parameters
  • wcd_cluster_url (Optional[str], optional): Weaviate Cloud cluster URL. Required when connection_type=‘cloud’.
  • wcd_api_key (Optional[str], optional): Weaviate Cloud API key. Required when connection_type=‘cloud’. # Local instance parameters
  • local_host (str, optional): Local Weaviate host. (default: :obj:"localhost")
  • local_port (int, optional): Local Weaviate HTTP port. (default: :obj:8080)
  • local_grpc_port (int, optional): Local Weaviate gRPC port. (default: :obj:50051)
  • local_auth_credentials (Optional[Union[str, Any]], optional): Authentication credentials for local instance. Can be an API key string or Auth object. (default: :obj:None) # Embedded Weaviate parameters
  • embedded_hostname (str, optional): Embedded instance hostname. (default: :obj:"127.0.0.1")
  • embedded_port (int, optional): Embedded instance HTTP port. (default: :obj:8079)
  • embedded_grpc_port (int, optional): Embedded instance gRPC port. (default: :obj:50050)
  • embedded_version (Optional[str], optional): Weaviate version for embedded instance. If None, uses the default version. (default: :obj:None)
  • embedded_persistence_data_path (Optional[str], optional): Directory for embedded database files. (default: :obj:None)
  • embedded_binary_path (Optional[str], optional): Directory for Weaviate binary. (default: :obj:None)
  • embedded_environment_variables (Optional[Dict[str, str]], optional): Environment variables for embedded instance. (default: :obj:None) # Custom connection parameters
  • custom_http_host (Optional[str], optional): Custom HTTP host.
  • custom_http_port (Optional[int], optional): Custom HTTP port.
  • custom_http_secure (Optional[bool], optional): Use HTTPS.
  • custom_grpc_host (Optional[str], optional): Custom gRPC host.
  • custom_grpc_port (Optional[int], optional): Custom gRPC port.
  • custom_grpc_secure (Optional[bool], optional): Use secure gRPC.
  • custom_auth_credentials (Optional[Any], optional): Custom auth. # Vector index configuration parameters
  • vector_index_type (VectorIndexType, optional): Vector index type. Supported types: ‘hnsw’, ‘flat’. (default: :obj:"hnsw")
  • distance_metric (DistanceMetric, optional): Distance metric for vector similarity. Supported metrics: ‘cosine’, ‘dot’, ‘l2-squared’, ‘hamming’, ‘manhattan’. (default: :obj:"cosine") # Common parameters for all connection types
  • headers (Optional[Dict[str, str]], optional): Additional headers for third-party API keys (e.g., OpenAI, Cohere). (default: :obj:None)
  • additional_config (Optional[Any], optional): Advanced configuration options like timeouts. (default: :obj:None)
  • skip_init_checks (bool, optional): Skip initialization checks. (default: :obj:False)

init

def __init__(
    self,
    vector_dim: int,
    collection_name: Optional[str] = None,
    connection_type: ConnectionType = 'local',
    wcd_cluster_url: Optional[str] = None,
    wcd_api_key: Optional[str] = None,
    local_host: str = 'localhost',
    local_port: int = 8080,
    local_grpc_port: int = 50051,
    local_auth_credentials: Optional[Union[str, Any]] = None,
    embedded_hostname: str = '127.0.0.1',
    embedded_port: int = 8079,
    embedded_grpc_port: int = 50050,
    embedded_version: Optional[str] = None,
    embedded_persistence_data_path: Optional[str] = None,
    embedded_binary_path: Optional[str] = None,
    embedded_environment_variables: Optional[Dict[str, str]] = None,
    custom_http_host: Optional[str] = None,
    custom_http_port: Optional[int] = None,
    custom_http_secure: Optional[bool] = None,
    custom_grpc_host: Optional[str] = None,
    custom_grpc_port: Optional[int] = None,
    custom_grpc_secure: Optional[bool] = None,
    custom_auth_credentials: Optional[Any] = None,
    vector_index_type: VectorIndexType = 'hnsw',
    distance_metric: DistanceMetric = 'cosine',
    headers: Optional[Dict[str, str]] = None,
    additional_config: Optional[Any] = None,
    skip_init_checks: bool = False,
    **kwargs: Any
):

_get_connection_client

def _get_connection_client(self):

Get Weaviate client based on connection type and user settings.

_create_cloud_client

def _create_cloud_client(self, weaviate_module: Any):

Create a Weaviate Cloud client.

_create_local_client

def _create_local_client(self, weaviate_module: Any):

Create a local Weaviate client.

_create_embedded_client

def _create_embedded_client(self, weaviate_module: Any):

Create an embedded Weaviate client.

_create_custom_client

def _create_custom_client(self, weaviate_module: Any):

Create a custom Weaviate client.

del

def __del__(self):

Clean up client connection.

close

def close(self):

Explicitly close the client connection.

_generate_collection_name

def _generate_collection_name(self):

Generate a collection name if user doesn’t provide one.

_check_and_create_collection

def _check_and_create_collection(self, **kwargs: Any):

Check if collection exists and create if it doesn’t.

_collection_exists

def _collection_exists(self, collection_name: str):

Check if the collection exists.

_get_vector_index_config

def _get_vector_index_config(self, **kwargs: Any):

Get vector index configuration based on user settings.

_create_collection

def _create_collection(self, **kwargs: Any):

Create a new collection in Weaviate.

add

def add(self, records: List[VectorRecord], **kwargs: Any):

Saves a list of vector records to the storage.

Parameters:

  • records (List[VectorRecord]): List of vector records to be saved. **kwargs (Any): Additional keyword arguments.

delete

def delete(self, ids: List[str], **kwargs: Any):

Deletes a list of vectors identified by their IDs from the storage.

Parameters:

  • ids (List[str]): List of unique identifiers for the vectors to be deleted. **kwargs (Any): Additional keyword arguments.

_calculate_similarity_from_distance

def _calculate_similarity_from_distance(self, distance: Optional[float]):

Calculate similarity score based on distance metric.

Parameters:

  • distance (Optional[float]): The distance value from Weaviate.

Returns:

float: Normalized similarity score between 0 and 1.

status

def status(self):

Returns:

VectorDBStatus: The vector database status.

query

def query(self, query: VectorDBQuery, **kwargs: Any):

Searches for similar vectors in the storage based on the provided query.

Parameters:

  • query (VectorDBQuery): The query object containing the search vector and the number of top similar vectors to retrieve. **kwargs (Any): Additional keyword arguments.

Returns:

List[VectorDBQueryResult]: A list of vectors retrieved from the storage based on similarity to the query vector.

clear

def clear(self):

Remove all vectors from the storage.

load

def load(self):

Load the collection hosted on cloud service.

client

def client(self):

Provides access to the underlying vector database client.