Skip to content

Utils

filter_documents_by_threshold(documents, threshold, *, higher_is_better)

Filter documents by score threshold while preserving order.

Source code in dynamiq/components/retrievers/utils.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
def filter_documents_by_threshold(
    documents: Iterable[Document],
    threshold: float | None,
    *,
    higher_is_better: bool,
) -> list[Document]:
    """Filter documents by score threshold while preserving order."""
    if threshold is None:
        return list(documents)

    filtered: list[Document] = []
    for document in documents:
        score = document.score
        if score is None:
            filtered.append(document)
            continue

        if higher_is_better:
            if score >= threshold:
                filtered.append(document)
        elif score <= threshold:
            filtered.append(document)

    return filtered