Skip to content

Zenrows

ZenRowsTool

Bases: ConnectionNode

A tool for scraping web pages, powered by ZenRows.

This class is responsible for scraping the content of a web page using ZenRows.

Source code in dynamiq/nodes/tools/zenrows.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
class ZenRowsTool(ConnectionNode):
    """
    A tool for scraping web pages, powered by ZenRows.

    This class is responsible for scraping the content of a web page using ZenRows.
    """

    group: Literal[NodeGroup.TOOLS] = NodeGroup.TOOLS
    name: str = "Zenrows Scraper Tool"
    description: str = DESCRIPTION_ZENROWS
    connection: ZenRows
    url: str | None = None
    markdown_response: bool = Field(
        default=True,
        description="If True, the content will be parsed as Markdown instead of HTML.",
    )

    model_config = ConfigDict(arbitrary_types_allowed=True)

    input_schema: ClassVar[type[ZenRowsInputSchema]] = ZenRowsInputSchema

    def execute(self, input_data: ZenRowsInputSchema, config: RunnableConfig = None, **kwargs) -> dict[str, Any]:
        """
        Executes the web scraping process.

        Args:
            input_data (dict[str, Any]): A dictionary containing 'input' key with the URL to scrape.
            config (RunnableConfig, optional): Configuration for the runnable, including callbacks.
            **kwargs: Additional arguments passed to the execution context.

        Returns:
            dict[str, Any]: A dictionary containing the URL and the scraped content.
        """
        logger.info(f"Tool {self.name} - {self.id}: started with input:\n{input_data.model_dump()}")

        # Ensure the config is set up correctly
        config = ensure_config(config)
        self.run_on_node_execute_run(config.callbacks, **kwargs)

        params = {
            "url": input_data.url,
            "markdown_response": str(self.markdown_response).lower(),
        }

        try:
            response = self.client.request(
                method=self.connection.method,
                url=self.connection.url,
                params={**self.connection.params, **params},
            )
            response.raise_for_status()
            scrape_result = response.text
        except Exception as e:
            logger.error(f"Tool {self.name} - {self.id}: failed to get results. Error: {e}")
            raise ToolExecutionException(
                f"Tool '{self.name}' failed to execute the requested action. "
                f"Error: {str(e)}. Please analyze the error and take appropriate action.",
                recoverable=True,
            )

        if self.is_optimized_for_agents:
            result = f"## Source URL\n{input_data.url}\n\n## Scraped Result\n\n{scrape_result}\n"
        else:
            result = {"url": input_data.url, "content": scrape_result}
        logger.info(f"Tool {self.name} - {self.id}: finished with result:\n{str(result)[:200]}...")
        return {"content": result}

execute(input_data, config=None, **kwargs)

Executes the web scraping process.

Parameters:

Name Type Description Default
input_data dict[str, Any]

A dictionary containing 'input' key with the URL to scrape.

required
config RunnableConfig

Configuration for the runnable, including callbacks.

None
**kwargs

Additional arguments passed to the execution context.

{}

Returns:

Type Description
dict[str, Any]

dict[str, Any]: A dictionary containing the URL and the scraped content.

Source code in dynamiq/nodes/tools/zenrows.py
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
def execute(self, input_data: ZenRowsInputSchema, config: RunnableConfig = None, **kwargs) -> dict[str, Any]:
    """
    Executes the web scraping process.

    Args:
        input_data (dict[str, Any]): A dictionary containing 'input' key with the URL to scrape.
        config (RunnableConfig, optional): Configuration for the runnable, including callbacks.
        **kwargs: Additional arguments passed to the execution context.

    Returns:
        dict[str, Any]: A dictionary containing the URL and the scraped content.
    """
    logger.info(f"Tool {self.name} - {self.id}: started with input:\n{input_data.model_dump()}")

    # Ensure the config is set up correctly
    config = ensure_config(config)
    self.run_on_node_execute_run(config.callbacks, **kwargs)

    params = {
        "url": input_data.url,
        "markdown_response": str(self.markdown_response).lower(),
    }

    try:
        response = self.client.request(
            method=self.connection.method,
            url=self.connection.url,
            params={**self.connection.params, **params},
        )
        response.raise_for_status()
        scrape_result = response.text
    except Exception as e:
        logger.error(f"Tool {self.name} - {self.id}: failed to get results. Error: {e}")
        raise ToolExecutionException(
            f"Tool '{self.name}' failed to execute the requested action. "
            f"Error: {str(e)}. Please analyze the error and take appropriate action.",
            recoverable=True,
        )

    if self.is_optimized_for_agents:
        result = f"## Source URL\n{input_data.url}\n\n## Scraped Result\n\n{scrape_result}\n"
    else:
        result = {"url": input_data.url, "content": scrape_result}
    logger.info(f"Tool {self.name} - {self.id}: finished with result:\n{str(result)[:200]}...")
    return {"content": result}