Skip to content

Pinecone

PineconeDocumentRetriever

Document Retriever using Pinecone.

Source code in dynamiq/components/retrievers/pinecone.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
class PineconeDocumentRetriever:
    """
    Document Retriever using Pinecone.
    """

    def __init__(
        self,
        *,
        vector_store: PineconeVectorStore,
        filters: dict[str, Any] | None = None,
        top_k: int = 10,
    ):
        """
        Initializes a component for retrieving documents from a Pinecone vector store with optional filtering.

        Args:
            vector_store (PineconeVectorStore): An instance of PineconeVectorStore to interface with Pinecone vectors.
            filters (Optional[dict[str, Any]]): Filters to apply for retrieving specific documents. Defaults to None.
            top_k (int): The maximum number of documents to return. Defaults to 10.

        Raises:
            ValueError: If the `vector_store` is not an instance of `PineconeVectorStore`.

        This initializer checks if the `vector_store` provided is an instance of the expected `PineconeVectorStore`
        class, sets up filtering conditions if any, and defines how many top results to retrieve in document queries.
        """
        if not isinstance(vector_store, PineconeVectorStore):
            msg = "document_store must be an instance of PineconeVectorStore"
            raise ValueError(msg)

        self.vector_store = vector_store
        self.filters = filters or {}
        self.top_k = top_k

    def run(
        self,
        query_embedding: list[float],
        exclude_document_embeddings: bool = True,
        top_k: int | None = None,
        filters: dict[str, Any] | None = None,
        content_key: str | None = None,
    ) -> dict[str, list[Document]]:
        """
        Retrieves documents from the PineconeDocumentStore that are similar to the provided query embedding.

        Args:
            query_embedding (List[float]): The embedding vector of the query for which similar documents are to be
            retrieved.
            exclude_document_embeddings (bool, optional): Specifies whether to exclude the embeddings of the retrieved
            documents from the output.
            top_k (int, optional): The maximum number of documents to return. Defaults to None.
            filters (Optional[dict[str, Any]]): Filters to apply for retrieving specific documents. Defaults to None.
            content_key (Optional[str]): The field used to store content in the storage.

        Returns:
            List[Document]: A list of Document instances sorted by their relevance to the query_embedding.
        """
        top_k = top_k or self.top_k
        filters = filters or self.filters

        docs = self.vector_store._embedding_retrieval(
            query_embedding=query_embedding,
            filters=filters,
            top_k=top_k,
            exclude_document_embeddings=exclude_document_embeddings,
            content_key=content_key,
        )
        logger.debug(f"Retrieved {len(docs)} documents from Pinecone Vector Store.")

        return {"documents": docs}

__init__(*, vector_store, filters=None, top_k=10)

Initializes a component for retrieving documents from a Pinecone vector store with optional filtering.

Parameters:

Name Type Description Default
vector_store PineconeVectorStore

An instance of PineconeVectorStore to interface with Pinecone vectors.

required
filters Optional[dict[str, Any]]

Filters to apply for retrieving specific documents. Defaults to None.

None
top_k int

The maximum number of documents to return. Defaults to 10.

10

Raises:

Type Description
ValueError

If the vector_store is not an instance of PineconeVectorStore.

This initializer checks if the vector_store provided is an instance of the expected PineconeVectorStore class, sets up filtering conditions if any, and defines how many top results to retrieve in document queries.

Source code in dynamiq/components/retrievers/pinecone.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
def __init__(
    self,
    *,
    vector_store: PineconeVectorStore,
    filters: dict[str, Any] | None = None,
    top_k: int = 10,
):
    """
    Initializes a component for retrieving documents from a Pinecone vector store with optional filtering.

    Args:
        vector_store (PineconeVectorStore): An instance of PineconeVectorStore to interface with Pinecone vectors.
        filters (Optional[dict[str, Any]]): Filters to apply for retrieving specific documents. Defaults to None.
        top_k (int): The maximum number of documents to return. Defaults to 10.

    Raises:
        ValueError: If the `vector_store` is not an instance of `PineconeVectorStore`.

    This initializer checks if the `vector_store` provided is an instance of the expected `PineconeVectorStore`
    class, sets up filtering conditions if any, and defines how many top results to retrieve in document queries.
    """
    if not isinstance(vector_store, PineconeVectorStore):
        msg = "document_store must be an instance of PineconeVectorStore"
        raise ValueError(msg)

    self.vector_store = vector_store
    self.filters = filters or {}
    self.top_k = top_k

run(query_embedding, exclude_document_embeddings=True, top_k=None, filters=None, content_key=None)

Retrieves documents from the PineconeDocumentStore that are similar to the provided query embedding.

Parameters:

Name Type Description Default
query_embedding List[float]

The embedding vector of the query for which similar documents are to be

required
exclude_document_embeddings bool

Specifies whether to exclude the embeddings of the retrieved

True
top_k int

The maximum number of documents to return. Defaults to None.

None
filters Optional[dict[str, Any]]

Filters to apply for retrieving specific documents. Defaults to None.

None
content_key Optional[str]

The field used to store content in the storage.

None

Returns:

Type Description
dict[str, list[Document]]

List[Document]: A list of Document instances sorted by their relevance to the query_embedding.

Source code in dynamiq/components/retrievers/pinecone.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
def run(
    self,
    query_embedding: list[float],
    exclude_document_embeddings: bool = True,
    top_k: int | None = None,
    filters: dict[str, Any] | None = None,
    content_key: str | None = None,
) -> dict[str, list[Document]]:
    """
    Retrieves documents from the PineconeDocumentStore that are similar to the provided query embedding.

    Args:
        query_embedding (List[float]): The embedding vector of the query for which similar documents are to be
        retrieved.
        exclude_document_embeddings (bool, optional): Specifies whether to exclude the embeddings of the retrieved
        documents from the output.
        top_k (int, optional): The maximum number of documents to return. Defaults to None.
        filters (Optional[dict[str, Any]]): Filters to apply for retrieving specific documents. Defaults to None.
        content_key (Optional[str]): The field used to store content in the storage.

    Returns:
        List[Document]: A list of Document instances sorted by their relevance to the query_embedding.
    """
    top_k = top_k or self.top_k
    filters = filters or self.filters

    docs = self.vector_store._embedding_retrieval(
        query_embedding=query_embedding,
        filters=filters,
        top_k=top_k,
        exclude_document_embeddings=exclude_document_embeddings,
        content_key=content_key,
    )
    logger.debug(f"Retrieved {len(docs)} documents from Pinecone Vector Store.")

    return {"documents": docs}