Source code for langchain.vectorstores.weaviate

"""Wrapper around weaviate vector database."""
from __future__ import annotations

from typing import Any, Dict, Iterable, List, Optional, Type
from uuid import uuid4

from langchain.docstore.document import Document
from langchain.embeddings.base import Embeddings
from langchain.utils import get_from_dict_or_env
from langchain.vectorstores.base import VectorStore


def _default_schema(index_name: str) -> Dict:
    return {
        "class": index_name,
        "properties": [
            {
                "name": "text",
                "dataType": ["text"],
            }
        ],
    }


[docs]class Weaviate(VectorStore): """Wrapper around Weaviate vector database. To use, you should have the ``weaviate-client`` python package installed. Example: .. code-block:: python import weaviate from langchain.vectorstores import Weaviate client = weaviate.Client(url=os.environ["WEAVIATE_URL"], ...) weaviate = Weaviate(client, index_name, text_key) """ def __init__( self, client: Any, index_name: str, text_key: str, attributes: Optional[List[str]] = None, ): """Initialize with Weaviate client.""" try: import weaviate except ImportError: raise ValueError( "Could not import weaviate python package. " "Please install it with `pip install weaviate-client`." ) if not isinstance(client, weaviate.Client): raise ValueError( f"client should be an instance of weaviate.Client, got {type(client)}" ) self._client = client self._index_name = index_name self._text_key = text_key self._query_attrs = [self._text_key] if attributes is not None: self._query_attrs.extend(attributes)
[docs] def add_texts( self, texts: Iterable[str], metadatas: Optional[List[dict]] = None, **kwargs: Any, ) -> List[str]: """Upload texts with metadata (properties) to Weaviate.""" from weaviate.util import get_valid_uuid with self._client.batch as batch: ids = [] for i, doc in enumerate(texts): data_properties = { self._text_key: doc, } if metadatas is not None: for key in metadatas[i].keys(): data_properties[key] = metadatas[i][key] _id = get_valid_uuid(uuid4()) batch.add_data_object( data_object=data_properties, class_name=self._index_name, uuid=_id ) ids.append(_id) return ids
[docs] def similarity_search_by_vector( self, embedding: List[float], k: int = 4, **kwargs: Any ) -> List[Document]: """Look up similar documents by embedding vector in Weaviate.""" vector = {"vector": embedding} query_obj = self._client.query.get(self._index_name, self._query_attrs) result = query_obj.with_near_vector(vector).with_limit(k).do() if "errors" in result: raise ValueError(f"Error during query: {result['errors']}") docs = [] for res in result["data"]["Get"][self._index_name]: text = res.pop(self._text_key) docs.append(Document(page_content=text, metadata=res)) return docs
[docs] @classmethod def from_texts( cls: Type[Weaviate], texts: List[str], embedding: Embeddings, metadatas: Optional[List[dict]] = None, **kwargs: Any, ) -> Weaviate: """Construct Weaviate wrapper from raw documents. This is a user-friendly interface that: 1. Embeds documents. 2. Creates a new index for the embeddings in the Weaviate instance. 3. Adds the documents to the newly created Weaviate index. This is intended to be a quick way to get started. Example: .. code-block:: python from langchain.vectorstores.weaviate import Weaviate from langchain.embeddings import OpenAIEmbeddings embeddings = OpenAIEmbeddings() weaviate = Weaviate.from_texts( texts, embeddings, weaviate_url="http://localhost:8080" ) """ weaviate_url = get_from_dict_or_env(kwargs, "weaviate_url", "WEAVIATE_URL") try: from weaviate import Client from weaviate.util import get_valid_uuid except ImportError: raise ValueError( "Could not import weaviate python package. " "Please install it with `pip instal weaviate-client`" ) client = Client(weaviate_url) index_name = kwargs.get("index_name", f"LangChain_{uuid4().hex}") embeddings = embedding.embed_documents(texts) if embedding else None text_key = "text" schema = _default_schema(index_name) attributes = list(metadatas[0].keys()) if metadatas else None # check whether the index already exists if not client.schema.contains(schema): client.schema.create_class(schema) with client.batch as batch: for i, text in enumerate(texts): data_properties = { text_key: text, } if metadatas is not None: for key in metadatas[i].keys(): data_properties[key] = metadatas[i][key] _id = get_valid_uuid(uuid4()) # if an embedding strategy is not provided, we let # weaviate create the embedding. Note that this will only # work if weaviate has been installed with a vectorizer module # like text2vec-contextionary for example params = { "uuid": _id, "data_object": data_properties, "class_name": index_name, } if embeddings is not None: params["vector"] = (embeddings[i],) batch.add_data_object(**params) batch.flush() return cls(client, index_name, text_key, attributes)