66
77async def main () -> None :
88 crawler = BeautifulSoupCrawler (
9- # Keep the crawler alive even when there are no requests to be processed at the moment .
9+ # Keep the crawler alive even when there are no requests to be processed now .
1010 keep_alive = True ,
1111 )
1212
1313 def stop_crawler_if_url_visited (context : BasicCrawlingContext ) -> None :
14- """Stop crawler once specific url is visited. Just an example of guard condition to stop the crawler."""
14+ """Stop crawler once specific url is visited.
15+
16+ Example of guard condition to stop the crawler."""
1517 if context .request .url == 'https://crawlee.dev/docs/examples' :
16- crawler .stop ('Stop crawler that was in keep_alive state after specific url was visited' )
18+ crawler .stop (
19+ 'Stop crawler that was in keep_alive state after specific url was visite'
20+ )
1721 else :
1822 context .log .info ('keep_alive=True, waiting for more requests to come.' )
1923
2024 async def add_request_later (url : str , after_s : int ) -> None :
21- """Add requests to the queue after some time. This can be done by external code."""
22- # Just an example of request being added to the crawler later, when it is waiting due to `keep_alive=True`.
25+ """Add requests to the queue after some time. Can be done by external code."""
26+ # Just an example of request being added to the crawler later,
27+ # when it is waiting due to `keep_alive=True`.
2328 await asyncio .sleep (after_s )
2429 await crawler .add_requests ([url ])
2530
@@ -33,11 +38,15 @@ async def request_handler(context: BasicCrawlingContext) -> None:
3338
3439 # Start some tasks that will add some requests later to simulate real situation,
3540 # where requests are added later by external code.
36- add_request_later_task1 = asyncio .create_task (add_request_later (url = 'https://crawlee.dev' , after_s = 1 ))
37- add_request_later_task2 = asyncio .create_task (add_request_later (url = 'https://crawlee.dev/docs/examples' , after_s = 5 ))
41+ add_request_later_task1 = asyncio .create_task (
42+ add_request_later (url = 'https://crawlee.dev' , after_s = 1 )
43+ )
44+ add_request_later_task2 = asyncio .create_task (
45+ add_request_later (url = 'https://crawlee.dev/docs/examples' , after_s = 5 )
46+ )
3847
3948 # Run the crawler without the initial list of requests.
40- # It will wait for more requests to be added to the queue later due to `keep_alive=True`.
49+ # Wait for more requests to be added to the queue later due to `keep_alive=True`.
4150 await crawler .run ()
4251
4352 await asyncio .gather (add_request_later_task1 , add_request_later_task2 )
0 commit comments