Skip to content

Weaviate

WeaviateDocumentRetriever

Bases: Retriever, WeaviateRetrieverVectorStoreParams

Document Retriever using Weaviate.

This class implements a document retriever that uses Weaviate as the vector store backend.

Parameters:

Name Type Description Default
vector_store WeaviateVectorStore

An instance of WeaviateVectorStore to interface with Weaviate vectors.

required
filters dict[str, Any]

Filters to apply for retrieving specific documents. Defaults to None.

required
top_k int

The maximum number of documents to return. Defaults to 10.

required

Attributes:

Name Type Description
group Literal[RETRIEVERS]

The group of the node.

name str

The name of the node.

vector_store WeaviateVectorStore | None

The WeaviateVectorStore instance.

filters dict[str, Any] | None

Filters for document retrieval.

top_k int

The maximum number of documents to return.

document_retriever WeaviateDocumentRetriever

The document retriever component.

Source code in dynamiq/nodes/retrievers/weaviate.py
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
class WeaviateDocumentRetriever(Retriever, WeaviateRetrieverVectorStoreParams):
    """Document Retriever using Weaviate.

    This class implements a document retriever that uses Weaviate as the vector store backend.

    Args:
        vector_store (WeaviateVectorStore, optional): An instance of WeaviateVectorStore to interface
            with Weaviate vectors.
        filters (dict[str, Any], optional): Filters to apply for retrieving specific documents.
            Defaults to None.
        top_k (int, optional): The maximum number of documents to return. Defaults to 10.

    Attributes:
        group (Literal[NodeGroup.RETRIEVERS]): The group of the node.
        name (str): The name of the node.
        vector_store (WeaviateVectorStore | None): The WeaviateVectorStore instance.
        filters (dict[str, Any] | None): Filters for document retrieval.
        top_k (int): The maximum number of documents to return.
        document_retriever (WeaviateDocumentRetrieverComponent): The document retriever component.
    """

    name: str = "WeaviateDocumentRetriever"
    connection: Weaviate | None = None
    vector_store: WeaviateVectorStore | None = None
    document_retriever: WeaviateDocumentRetrieverComponent | None = None

    def __init__(self, **kwargs):
        """
        Initialize the WeaviateDocumentRetriever.

        If neither vector_store nor connection is provided in kwargs, a default Weaviate connection will be created.

        Args:
            **kwargs: Keyword arguments to initialize the retriever.
        """
        if kwargs.get("vector_store") is None and kwargs.get("connection") is None:
            kwargs["connection"] = Weaviate()
        super().__init__(**kwargs)

    @property
    def vector_store_cls(self):
        return WeaviateVectorStore

    @property
    def vector_store_params(self):
        params = self.model_dump(include=set(WeaviateRetrieverVectorStoreParams.model_fields))
        params.update(
            {
                "connection": self.connection,
                "client": self.client,
            }
        )
        return params

    def init_components(self, connection_manager: ConnectionManager | None = None):
        """
        Initialize the components of the retriever.

        This method sets up the document retriever component if it hasn't been initialized yet.

        Args:
            connection_manager (ConnectionManager, optional): The connection manager to use.
                Defaults to a new ConnectionManager instance.
        """
        connection_manager = connection_manager or ConnectionManager()
        super().init_components(connection_manager)
        if self.document_retriever is None:
            self.document_retriever = WeaviateDocumentRetrieverComponent(
                vector_store=self.vector_store,
                filters=self.filters,
                top_k=self.top_k,
                similarity_threshold=self.similarity_threshold,
            )

    def execute(self, input_data: RetrieverInputSchema, config: RunnableConfig = None, **kwargs) -> dict[str, Any]:
        """
        Execute the document retrieval process.

        This method retrieves documents based on the input embedding.

        Args:
            input_data (RetrieverInputSchema): The input data containing the query embedding.
            config (RunnableConfig, optional): The configuration for the execution. Defaults to None.
            **kwargs: Additional keyword arguments.

        Returns:
            dict[str, Any]: A dictionary containing the retrieved documents.
        """
        config = ensure_config(config)
        self.run_on_node_execute_run(config.callbacks, **kwargs)

        query_embedding = input_data.embedding
        content_key = input_data.content_key
        filters = input_data.filters or self.filters
        top_k = input_data.top_k or self.top_k
        similarity_threshold = (
            input_data.similarity_threshold
            if input_data.similarity_threshold is not None
            else self.similarity_threshold
        )

        alpha = input_data.alpha or self.alpha
        query = input_data.query

        output = self.document_retriever.run(
            query_embedding,
            filters=filters,
            top_k=top_k,
            content_key=content_key,
            query=query,
            alpha=alpha,
            similarity_threshold=similarity_threshold,
        )

        return {
            "documents": output["documents"],
        }

__init__(**kwargs)

Initialize the WeaviateDocumentRetriever.

If neither vector_store nor connection is provided in kwargs, a default Weaviate connection will be created.

Parameters:

Name Type Description Default
**kwargs

Keyword arguments to initialize the retriever.

{}
Source code in dynamiq/nodes/retrievers/weaviate.py
39
40
41
42
43
44
45
46
47
48
49
50
def __init__(self, **kwargs):
    """
    Initialize the WeaviateDocumentRetriever.

    If neither vector_store nor connection is provided in kwargs, a default Weaviate connection will be created.

    Args:
        **kwargs: Keyword arguments to initialize the retriever.
    """
    if kwargs.get("vector_store") is None and kwargs.get("connection") is None:
        kwargs["connection"] = Weaviate()
    super().__init__(**kwargs)

execute(input_data, config=None, **kwargs)

Execute the document retrieval process.

This method retrieves documents based on the input embedding.

Parameters:

Name Type Description Default
input_data RetrieverInputSchema

The input data containing the query embedding.

required
config RunnableConfig

The configuration for the execution. Defaults to None.

None
**kwargs

Additional keyword arguments.

{}

Returns:

Type Description
dict[str, Any]

dict[str, Any]: A dictionary containing the retrieved documents.

Source code in dynamiq/nodes/retrievers/weaviate.py
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
def execute(self, input_data: RetrieverInputSchema, config: RunnableConfig = None, **kwargs) -> dict[str, Any]:
    """
    Execute the document retrieval process.

    This method retrieves documents based on the input embedding.

    Args:
        input_data (RetrieverInputSchema): The input data containing the query embedding.
        config (RunnableConfig, optional): The configuration for the execution. Defaults to None.
        **kwargs: Additional keyword arguments.

    Returns:
        dict[str, Any]: A dictionary containing the retrieved documents.
    """
    config = ensure_config(config)
    self.run_on_node_execute_run(config.callbacks, **kwargs)

    query_embedding = input_data.embedding
    content_key = input_data.content_key
    filters = input_data.filters or self.filters
    top_k = input_data.top_k or self.top_k
    similarity_threshold = (
        input_data.similarity_threshold
        if input_data.similarity_threshold is not None
        else self.similarity_threshold
    )

    alpha = input_data.alpha or self.alpha
    query = input_data.query

    output = self.document_retriever.run(
        query_embedding,
        filters=filters,
        top_k=top_k,
        content_key=content_key,
        query=query,
        alpha=alpha,
        similarity_threshold=similarity_threshold,
    )

    return {
        "documents": output["documents"],
    }

init_components(connection_manager=None)

Initialize the components of the retriever.

This method sets up the document retriever component if it hasn't been initialized yet.

Parameters:

Name Type Description Default
connection_manager ConnectionManager

The connection manager to use. Defaults to a new ConnectionManager instance.

None
Source code in dynamiq/nodes/retrievers/weaviate.py
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
def init_components(self, connection_manager: ConnectionManager | None = None):
    """
    Initialize the components of the retriever.

    This method sets up the document retriever component if it hasn't been initialized yet.

    Args:
        connection_manager (ConnectionManager, optional): The connection manager to use.
            Defaults to a new ConnectionManager instance.
    """
    connection_manager = connection_manager or ConnectionManager()
    super().init_components(connection_manager)
    if self.document_retriever is None:
        self.document_retriever = WeaviateDocumentRetrieverComponent(
            vector_store=self.vector_store,
            filters=self.filters,
            top_k=self.top_k,
            similarity_threshold=self.similarity_threshold,
        )