Skip to content

Commit a2e8496

Browse files
authored
docs: Set line length to docs related code to 90 (#973)
### Description Set line length to docs related code to 90 to have each code example fully visible without the need to use slider. Update existing examples to be compliant. ### Issues - Closes: #970
1 parent c33b34d commit a2e8496

25 files changed

+146
-53
lines changed

docs/examples/code/adaptive_playwright_crawler.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@ async def main() -> None:
1515
)
1616

1717
@crawler.router.handler(label='label')
18-
async def request_handler_for_label(context: AdaptivePlaywrightCrawlingContext) -> None:
18+
async def request_handler_for_label(
19+
context: AdaptivePlaywrightCrawlingContext,
20+
) -> None:
1921
# Do some processing using `page`
2022
some_locator = context.page.locator('div').first
2123
await some_locator.wait_for()
@@ -35,8 +37,8 @@ async def request_handler(context: AdaptivePlaywrightCrawlingContext) -> None:
3537
@crawler.pre_navigation_hook
3638
async def hook(context: AdaptivePlaywrightPreNavCrawlingContext) -> None:
3739
"""Hook executed both in static sub crawler and playwright sub crawler."""
38-
# Trying to access context.page in this hook would raise `AdaptiveContextError` for pages crawled
39-
# without playwright.
40+
# Trying to access context.page in this hook would raise `AdaptiveContextError`
41+
# for pages crawled without playwright.
4042
context.log.info(f'pre navigation hook for: {context.request.url} ...')
4143

4244
@crawler.pre_navigation_hook(playwright_only=True)
@@ -47,7 +49,9 @@ async def some_routing_function(route: Route) -> None:
4749
await route.continue_()
4850

4951
await context.page.route('*/**', some_routing_function)
50-
context.log.info(f'Playwright only pre navigation hook for: {context.request.url} ...')
52+
context.log.info(
53+
f'Playwright only pre navigation hook for: {context.request.url} ...'
54+
)
5155

5256
# Run the crawler with the initial list of URLs.
5357
await crawler.run(['https://warehouse-theme-metal.myshopify.com/'])

docs/examples/code/beautifulsoup_crawler.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
import asyncio
22
from datetime import timedelta
33

4-
from crawlee.crawlers import BasicCrawlingContext, BeautifulSoupCrawler, BeautifulSoupCrawlingContext
4+
from crawlee.crawlers import (
5+
BasicCrawlingContext,
6+
BeautifulSoupCrawler,
7+
BeautifulSoupCrawlingContext,
8+
)
59

610

711
async def main() -> None:

docs/examples/code/beautifulsoup_crawler_keep_alive.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,25 @@
66

77
async def main() -> None:
88
crawler = BeautifulSoupCrawler(
9-
# Keep the crawler alive even when there are no requests to be processed at the moment.
9+
# Keep the crawler alive even when there are no requests to be processed now.
1010
keep_alive=True,
1111
)
1212

1313
def stop_crawler_if_url_visited(context: BasicCrawlingContext) -> None:
14-
"""Stop crawler once specific url is visited. Just an example of guard condition to stop the crawler."""
14+
"""Stop crawler once specific url is visited.
15+
16+
Example of guard condition to stop the crawler."""
1517
if context.request.url == 'https://crawlee.dev/docs/examples':
16-
crawler.stop('Stop crawler that was in keep_alive state after specific url was visited')
18+
crawler.stop(
19+
'Stop crawler that was in keep_alive state after specific url was visite'
20+
)
1721
else:
1822
context.log.info('keep_alive=True, waiting for more requests to come.')
1923

2024
async def add_request_later(url: str, after_s: int) -> None:
21-
"""Add requests to the queue after some time. This can be done by external code."""
22-
# Just an example of request being added to the crawler later, when it is waiting due to `keep_alive=True`.
25+
"""Add requests to the queue after some time. Can be done by external code."""
26+
# Just an example of request being added to the crawler later,
27+
# when it is waiting due to `keep_alive=True`.
2328
await asyncio.sleep(after_s)
2429
await crawler.add_requests([url])
2530

@@ -33,11 +38,15 @@ async def request_handler(context: BasicCrawlingContext) -> None:
3338

3439
# Start some tasks that will add some requests later to simulate real situation,
3540
# where requests are added later by external code.
36-
add_request_later_task1 = asyncio.create_task(add_request_later(url='https://crawlee.dev', after_s=1))
37-
add_request_later_task2 = asyncio.create_task(add_request_later(url='https://crawlee.dev/docs/examples', after_s=5))
41+
add_request_later_task1 = asyncio.create_task(
42+
add_request_later(url='https://crawlee.dev', after_s=1)
43+
)
44+
add_request_later_task2 = asyncio.create_task(
45+
add_request_later(url='https://crawlee.dev/docs/examples', after_s=5)
46+
)
3847

3948
# Run the crawler without the initial list of requests.
40-
# It will wait for more requests to be added to the queue later due to `keep_alive=True`.
49+
# Wait for more requests to be added to the queue later due to `keep_alive=True`.
4150
await crawler.run()
4251

4352
await asyncio.gather(add_request_later_task1, add_request_later_task2)

docs/examples/code/beautifulsoup_crawler_stop.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
2020

2121
# Create custom condition to stop crawler once it finds what it is looking for.
2222
if 'crawlee' in context.request.url:
23-
crawler.stop(reason='Manual stop of crawler after finding `crawlee` in the url.')
23+
crawler.stop(
24+
reason='Manual stop of crawler after finding `crawlee` in the url.'
25+
)
2426

2527
# Extract data from the page.
2628
data = {

docs/examples/code/crawl_website_with_relative_links_all_links.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ async def main() -> None:
1515
async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
1616
context.log.info(f'Processing {context.request.url} ...')
1717

18-
# Enqueue all links found on the page. Any URLs found will be matched by this strategy,
19-
# even if they go off the site you are currently crawling.
18+
# Enqueue all links found on the page. Any URLs found will be matched by
19+
# this strategy, even if they go off the site you are currently crawling.
2020
await context.enqueue_links(strategy=EnqueueStrategy.ALL)
2121

2222
# Run the crawler with the initial list of requests.

docs/examples/code/playwright_block_requests.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
import asyncio
22

3-
from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext, PlaywrightPreNavCrawlingContext
3+
from crawlee.crawlers import (
4+
PlaywrightCrawler,
5+
PlaywrightCrawlingContext,
6+
PlaywrightPreNavCrawlingContext,
7+
)
48

59

610
async def main() -> None:

docs/examples/code/playwright_crawler.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
import asyncio
22

3-
from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext, PlaywrightPreNavCrawlingContext
3+
from crawlee.crawlers import (
4+
PlaywrightCrawler,
5+
PlaywrightCrawlingContext,
6+
PlaywrightPreNavCrawlingContext,
7+
)
48

59

610
async def main() -> None:

docs/examples/code/playwright_crawler_with_camoufox.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,31 +4,40 @@
44
from camoufox import AsyncNewBrowser
55
from typing_extensions import override
66

7-
from crawlee.browsers import BrowserPool, PlaywrightBrowserController, PlaywrightBrowserPlugin
7+
from crawlee.browsers import (
8+
BrowserPool,
9+
PlaywrightBrowserController,
10+
PlaywrightBrowserPlugin,
11+
)
812
from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext
913

1014

1115
class CamoufoxPlugin(PlaywrightBrowserPlugin):
12-
"""Example browser plugin that uses Camoufox browser, but otherwise keeps the functionality of
13-
PlaywrightBrowserPlugin."""
16+
"""Example browser plugin that uses Camoufox browser,
17+
but otherwise keeps the functionality of PlaywrightBrowserPlugin.
18+
"""
1419

1520
@override
1621
async def new_browser(self) -> PlaywrightBrowserController:
1722
if not self._playwright:
1823
raise RuntimeError('Playwright browser plugin is not initialized.')
1924

2025
return PlaywrightBrowserController(
21-
browser=await AsyncNewBrowser(self._playwright, **self._browser_launch_options),
22-
max_open_pages_per_browser=1, # Increase, if camoufox can handle it in your use case.
23-
header_generator=None, # This turns off the crawlee header_generation. Camoufox has its own.
26+
browser=await AsyncNewBrowser(
27+
self._playwright, **self._browser_launch_options
28+
),
29+
# Increase, if camoufox can handle it in your use case.
30+
max_open_pages_per_browser=1,
31+
# This turns off the crawlee header_generation. Camoufox has its own.
32+
header_generator=None,
2433
)
2534

2635

2736
async def main() -> None:
2837
crawler = PlaywrightCrawler(
2938
# Limit the crawl to max requests. Remove or increase it for crawling all links.
3039
max_requests_per_crawl=10,
31-
# Custom browser pool. This gives users full control over browsers used by the crawler.
40+
# Custom browser pool. Gives users full control over browsers used by the crawler.
3241
browser_pool=BrowserPool(plugins=[CamoufoxPlugin()]),
3342
)
3443

docs/examples/code/playwright_crawler_with_fingerprint_generator.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
import asyncio
22

33
from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext
4-
from crawlee.fingerprint_suite import DefaultFingerprintGenerator, HeaderGeneratorOptions, ScreenOptions
4+
from crawlee.fingerprint_suite import (
5+
DefaultFingerprintGenerator,
6+
HeaderGeneratorOptions,
7+
ScreenOptions,
8+
)
59

610

711
async def main() -> None:
812
# Use default fingerprint generator with desired fingerprint options.
9-
# Generator will try to generate real looking browser fingerprint based on the options.
13+
# Generator will generate real looking browser fingerprint based on the options.
1014
# Unspecified fingerprint options will be automatically selected by the generator.
1115
fingerprint_generator = DefaultFingerprintGenerator(
1216
header_options=HeaderGeneratorOptions(browsers=['chromium']),

docs/guides/code/playwright_crawler/browser_configuration_example.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ async def main() -> None:
99
browser_type='chromium',
1010
# Browser launch options
1111
browser_launch_options={
12-
# For support `msedge` channel you need to install it `playwright install msedge`
12+
# For support `msedge` channel you need to install it
13+
# `playwright install msedge`
1314
'channel': 'msedge',
1415
'slow_mo': 200,
1516
},

0 commit comments

Comments
 (0)