Skip to content

Weaviate

WeaviateDocumentRetriever

Bases: Retriever, WeaviateRetrieverVectorStoreParams

Document Retriever using Weaviate.

This class implements a document retriever that uses Weaviate as the vector store backend.

Parameters:

Name Type Description Default
vector_store WeaviateVectorStore

An instance of WeaviateVectorStore to interface with Weaviate vectors.

required
filters dict[str, Any]

Filters to apply for retrieving specific documents. Defaults to None.

required
top_k int

The maximum number of documents to return. Defaults to 10.

required

Attributes:

Name Type Description
group Literal[RETRIEVERS]

The group of the node.

name str

The name of the node.

vector_store WeaviateVectorStore | None

The WeaviateVectorStore instance.

filters dict[str, Any] | None

Filters for document retrieval.

top_k int

The maximum number of documents to return.

document_retriever WeaviateDocumentRetriever

The document retriever component.

Source code in dynamiq/nodes/retrievers/weaviate.py
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
class WeaviateDocumentRetriever(Retriever, WeaviateRetrieverVectorStoreParams):
    """Document Retriever using Weaviate.

    This class implements a document retriever that uses Weaviate as the vector store backend.

    Args:
        vector_store (WeaviateVectorStore, optional): An instance of WeaviateVectorStore to interface
            with Weaviate vectors.
        filters (dict[str, Any], optional): Filters to apply for retrieving specific documents.
            Defaults to None.
        top_k (int, optional): The maximum number of documents to return. Defaults to 10.

    Attributes:
        group (Literal[NodeGroup.RETRIEVERS]): The group of the node.
        name (str): The name of the node.
        vector_store (WeaviateVectorStore | None): The WeaviateVectorStore instance.
        filters (dict[str, Any] | None): Filters for document retrieval.
        top_k (int): The maximum number of documents to return.
        document_retriever (WeaviateDocumentRetrieverComponent): The document retriever component.
    """

    name: str = "WeaviateDocumentRetriever"
    connection: Weaviate | None = None
    vector_store: WeaviateVectorStore | None = None
    document_retriever: WeaviateDocumentRetrieverComponent | None = None

    def __init__(self, **kwargs):
        """
        Initialize the WeaviateDocumentRetriever.

        If neither vector_store nor connection is provided in kwargs, a default Weaviate connection will be created.

        Args:
            **kwargs: Keyword arguments to initialize the retriever.
        """
        if kwargs.get("vector_store") is None and kwargs.get("connection") is None:
            kwargs["connection"] = Weaviate()
        super().__init__(**kwargs)

    @property
    def vector_store_cls(self):
        return WeaviateVectorStore

    @property
    def vector_store_params(self):
        params = self.model_dump(include=set(WeaviateRetrieverVectorStoreParams.model_fields))
        params.update(
            {
                "connection": self.connection,
                "client": self.client,
            }
        )
        return params

    def init_components(self, connection_manager: ConnectionManager | None = None):
        """
        Initialize the components of the retriever.

        This method sets up the document retriever component if it hasn't been initialized yet.

        Args:
            connection_manager (ConnectionManager, optional): The connection manager to use.
                Defaults to a new ConnectionManager instance.
        """
        connection_manager = connection_manager or ConnectionManager()
        super().init_components(connection_manager)
        if self.document_retriever is None:
            self.document_retriever = WeaviateDocumentRetrieverComponent(
                vector_store=self.vector_store, filters=self.filters, top_k=self.top_k
            )

    def execute(self, input_data: RetrieverInputSchema, config: RunnableConfig = None, **kwargs) -> dict[str, Any]:
        """
        Execute the document retrieval process.

        This method retrieves documents based on the input embedding.

        Args:
            input_data (RetrieverInputSchema): The input data containing the query embedding.
            config (RunnableConfig, optional): The configuration for the execution. Defaults to None.
            **kwargs: Additional keyword arguments.

        Returns:
            dict[str, Any]: A dictionary containing the retrieved documents.
        """
        config = ensure_config(config)
        self.run_on_node_execute_run(config.callbacks, **kwargs)

        query_embedding = input_data.embedding
        content_key = input_data.content_key
        filters = input_data.filters or self.filters
        top_k = input_data.top_k or self.top_k

        alpha = input_data.alpha or self.alpha
        query = input_data.query

        output = self.document_retriever.run(
            query_embedding,
            filters=filters,
            top_k=top_k,
            content_key=content_key,
            query=query,
            alpha=alpha,
        )

        return {
            "documents": output["documents"],
        }

__init__(**kwargs)

Initialize the WeaviateDocumentRetriever.

If neither vector_store nor connection is provided in kwargs, a default Weaviate connection will be created.

Parameters:

Name Type Description Default
**kwargs

Keyword arguments to initialize the retriever.

{}
Source code in dynamiq/nodes/retrievers/weaviate.py
39
40
41
42
43
44
45
46
47
48
49
50
def __init__(self, **kwargs):
    """
    Initialize the WeaviateDocumentRetriever.

    If neither vector_store nor connection is provided in kwargs, a default Weaviate connection will be created.

    Args:
        **kwargs: Keyword arguments to initialize the retriever.
    """
    if kwargs.get("vector_store") is None and kwargs.get("connection") is None:
        kwargs["connection"] = Weaviate()
    super().__init__(**kwargs)

execute(input_data, config=None, **kwargs)

Execute the document retrieval process.

This method retrieves documents based on the input embedding.

Parameters:

Name Type Description Default
input_data RetrieverInputSchema

The input data containing the query embedding.

required
config RunnableConfig

The configuration for the execution. Defaults to None.

None
**kwargs

Additional keyword arguments.

{}

Returns:

Type Description
dict[str, Any]

dict[str, Any]: A dictionary containing the retrieved documents.

Source code in dynamiq/nodes/retrievers/weaviate.py
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
def execute(self, input_data: RetrieverInputSchema, config: RunnableConfig = None, **kwargs) -> dict[str, Any]:
    """
    Execute the document retrieval process.

    This method retrieves documents based on the input embedding.

    Args:
        input_data (RetrieverInputSchema): The input data containing the query embedding.
        config (RunnableConfig, optional): The configuration for the execution. Defaults to None.
        **kwargs: Additional keyword arguments.

    Returns:
        dict[str, Any]: A dictionary containing the retrieved documents.
    """
    config = ensure_config(config)
    self.run_on_node_execute_run(config.callbacks, **kwargs)

    query_embedding = input_data.embedding
    content_key = input_data.content_key
    filters = input_data.filters or self.filters
    top_k = input_data.top_k or self.top_k

    alpha = input_data.alpha or self.alpha
    query = input_data.query

    output = self.document_retriever.run(
        query_embedding,
        filters=filters,
        top_k=top_k,
        content_key=content_key,
        query=query,
        alpha=alpha,
    )

    return {
        "documents": output["documents"],
    }

init_components(connection_manager=None)

Initialize the components of the retriever.

This method sets up the document retriever component if it hasn't been initialized yet.

Parameters:

Name Type Description Default
connection_manager ConnectionManager

The connection manager to use. Defaults to a new ConnectionManager instance.

None
Source code in dynamiq/nodes/retrievers/weaviate.py
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
def init_components(self, connection_manager: ConnectionManager | None = None):
    """
    Initialize the components of the retriever.

    This method sets up the document retriever component if it hasn't been initialized yet.

    Args:
        connection_manager (ConnectionManager, optional): The connection manager to use.
            Defaults to a new ConnectionManager instance.
    """
    connection_manager = connection_manager or ConnectionManager()
    super().init_components(connection_manager)
    if self.document_retriever is None:
        self.document_retriever = WeaviateDocumentRetrieverComponent(
            vector_store=self.vector_store, filters=self.filters, top_k=self.top_k
        )