Bases: ConnectionNode
A tool for scraping web pages, powered by ZenRows.
This class is responsible for scraping the content of a web page using ZenRows.
Source code in dynamiq/nodes/tools/zenrows.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99 | class ZenRowsTool(ConnectionNode):
"""
A tool for scraping web pages, powered by ZenRows.
This class is responsible for scraping the content of a web page using ZenRows.
"""
group: Literal[NodeGroup.TOOLS] = NodeGroup.TOOLS
name: str = "Zenrows Scraper Tool"
description: str = DESCRIPTION_ZENROWS
connection: ZenRows
url: str | None = None
markdown_response: bool = Field(
default=True,
description="If True, the content will be parsed as Markdown instead of HTML.",
)
model_config = ConfigDict(arbitrary_types_allowed=True)
input_schema: ClassVar[type[ZenRowsInputSchema]] = ZenRowsInputSchema
def execute(self, input_data: ZenRowsInputSchema, config: RunnableConfig = None, **kwargs) -> dict[str, Any]:
"""
Executes the web scraping process.
Args:
input_data (dict[str, Any]): A dictionary containing 'input' key with the URL to scrape.
config (RunnableConfig, optional): Configuration for the runnable, including callbacks.
**kwargs: Additional arguments passed to the execution context.
Returns:
dict[str, Any]: A dictionary containing the URL and the scraped content.
"""
logger.info(f"Tool {self.name} - {self.id}: started with input:\n{input_data.model_dump()}")
# Ensure the config is set up correctly
config = ensure_config(config)
self.run_on_node_execute_run(config.callbacks, **kwargs)
params = {
"url": input_data.url,
"markdown_response": str(self.markdown_response).lower(),
}
try:
response = self.client.request(
method=self.connection.method,
url=self.connection.url,
params={**self.connection.params, **params},
)
response.raise_for_status()
scrape_result = response.text
except Exception as e:
logger.error(f"Tool {self.name} - {self.id}: failed to get results. Error: {e}")
raise ToolExecutionException(
f"Tool '{self.name}' failed to execute the requested action. "
f"Error: {str(e)}. Please analyze the error and take appropriate action.",
recoverable=True,
)
if self.is_optimized_for_agents:
result = f"## Source URL\n{input_data.url}\n\n## Scraped Result\n\n{scrape_result}\n"
else:
result = {"url": input_data.url, "content": scrape_result}
logger.info(f"Tool {self.name} - {self.id}: finished with result:\n{str(result)[:200]}...")
return {"content": result}
|
Executes the web scraping process.
Parameters:
Name |
Type |
Description |
Default |
input_data |
dict[str, Any]
|
A dictionary containing 'input' key with the URL to scrape.
|
required
|
config |
RunnableConfig
|
Configuration for the runnable, including callbacks.
|
None
|
**kwargs |
|
Additional arguments passed to the execution context.
|
{}
|
Returns:
Type |
Description |
dict[str, Any]
|
dict[str, Any]: A dictionary containing the URL and the scraped content.
|
Source code in dynamiq/nodes/tools/zenrows.py
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99 | def execute(self, input_data: ZenRowsInputSchema, config: RunnableConfig = None, **kwargs) -> dict[str, Any]:
"""
Executes the web scraping process.
Args:
input_data (dict[str, Any]): A dictionary containing 'input' key with the URL to scrape.
config (RunnableConfig, optional): Configuration for the runnable, including callbacks.
**kwargs: Additional arguments passed to the execution context.
Returns:
dict[str, Any]: A dictionary containing the URL and the scraped content.
"""
logger.info(f"Tool {self.name} - {self.id}: started with input:\n{input_data.model_dump()}")
# Ensure the config is set up correctly
config = ensure_config(config)
self.run_on_node_execute_run(config.callbacks, **kwargs)
params = {
"url": input_data.url,
"markdown_response": str(self.markdown_response).lower(),
}
try:
response = self.client.request(
method=self.connection.method,
url=self.connection.url,
params={**self.connection.params, **params},
)
response.raise_for_status()
scrape_result = response.text
except Exception as e:
logger.error(f"Tool {self.name} - {self.id}: failed to get results. Error: {e}")
raise ToolExecutionException(
f"Tool '{self.name}' failed to execute the requested action. "
f"Error: {str(e)}. Please analyze the error and take appropriate action.",
recoverable=True,
)
if self.is_optimized_for_agents:
result = f"## Source URL\n{input_data.url}\n\n## Scraped Result\n\n{scrape_result}\n"
else:
result = {"url": input_data.url, "content": scrape_result}
logger.info(f"Tool {self.name} - {self.id}: finished with result:\n{str(result)[:200]}...")
return {"content": result}
|