Skip to content

Commit b06937b

Browse files
authored
feat: add chrome BrowserType for PlaywrightCrawler to use the Chrome browser (#1487)
### Description - add `chrome` `BrowserType` for `PlaywrightCrawler` to use the Chrome browser ### Issues - Closes: #1071
1 parent 90dde0e commit b06937b

File tree

8 files changed

+44
-16
lines changed

8 files changed

+44
-16
lines changed

docs/examples/code_examples/using_browser_profiles_chrome.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,13 @@ async def main() -> None:
2727

2828
crawler = PlaywrightCrawler(
2929
headless=False,
30-
# Use chromium for Chrome compatibility
31-
browser_type='chromium',
30+
# Use the installed Chrome browser
31+
browser_type='chrome',
3232
# Disable fingerprints to preserve profile identity
3333
fingerprint_generator=None,
3434
# Set user data directory to temp folder
3535
user_data_dir=tmp_profile_dir,
3636
browser_launch_options={
37-
# Use installed Chrome browser
38-
'channel': 'chrome',
3937
# Slow down actions to mimic human behavior
4038
'slow_mo': 200,
4139
'args': [

docs/examples/using_browser_profile.mdx

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@ Using browser profiles allows you to leverage existing login sessions, saved pas
1818

1919
To run <ApiLink to="class/PlaywrightCrawler">`PlaywrightCrawler`</ApiLink> with your Chrome profile, you need to know the path to your profile files. You can find this information by entering `chrome://version/` as a URL in your Chrome browser. If you have multiple profiles, pay attention to the profile name - if you only have one profile, it's always `Default`.
2020

21-
You also need to use the [`channel`](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-option-channel) parameter in `browser_launch_options` to use the Chrome browser installed on your system instead of Playwright's Chromium.
22-
2321
:::warning Profile access limitation
2422
Due to [Chrome's security policies](https://developer.chrome.com/blog/remote-debugging-port), automation cannot use your main browsing profile directly. The example copies your profile to a temporary location as a workaround.
2523
:::

src/crawlee/browsers/_browser_pool.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,10 @@ def with_default_plugin(
118118
"""Initialize a new instance with a single `PlaywrightBrowserPlugin` configured with the provided options.
119119
120120
Args:
121-
browser_type: The type of browser to launch ('chromium', 'firefox', or 'webkit').
121+
browser_type: The type of browser to launch:
122+
- 'chromium', 'firefox', 'webkit': Use Playwright-managed browsers
123+
- 'chrome': Use your locally installed Google Chrome browser. Requires Google Chrome to be installed on
124+
the system.
122125
user_data_dir: Path to a user data directory, which stores browser session data like cookies
123126
and local storage.
124127
browser_launch_options: Keyword arguments to pass to the browser launch method. These options are provided

src/crawlee/browsers/_playwright_browser_plugin.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ class PlaywrightBrowserPlugin(BrowserPlugin):
3434
3535
It is a plugin designed to manage browser instances using the Playwright automation library. It acts as a factory
3636
for creating new browser instances and provides a unified interface for interacting with different browser types
37-
(chromium, firefox, and webkit). This class integrates configuration options for browser launches (headless mode,
38-
executable paths, sandboxing, ...). It also manages browser contexts and the number of pages open within each
37+
(chromium, firefox, webkit and chrome). This class integrates configuration options for browser launches (headless
38+
mode, executable paths, sandboxing, ...). It also manages browser contexts and the number of pages open within each
3939
browser instance, ensuring that resource limits are respected.
4040
"""
4141

@@ -55,7 +55,10 @@ def __init__(
5555
"""Initialize a new instance.
5656
5757
Args:
58-
browser_type: The type of browser to launch ('chromium', 'firefox', or 'webkit').
58+
browser_type: The type of browser to launch:
59+
- 'chromium', 'firefox', 'webkit': Use Playwright-managed browsers
60+
- 'chrome': Use your locally installed Google Chrome browser. Requires Google Chrome to be installed on
61+
the system.
5962
user_data_dir: Path to a User Data Directory, which stores browser session data like cookies and local
6063
storage.
6164
browser_launch_options: Keyword arguments to pass to the browser launch method. These options are provided
@@ -80,6 +83,17 @@ def __init__(
8083
'chromium_sandbox': not config.disable_browser_sandbox,
8184
}
8285

86+
if browser_type == 'chrome' and default_launch_browser_options['executable_path']:
87+
raise ValueError(
88+
'Cannot use browser_type `chrome` with `Configuration.default_browser_path` or `executable_path` set.'
89+
)
90+
91+
# Map 'chrome' to 'chromium' with the 'chrome' channel.
92+
if browser_type == 'chrome':
93+
browser_type = 'chromium'
94+
# Chromium parameter 'channel' set to 'chrome' enables using installed Google Chrome.
95+
default_launch_browser_options['channel'] = 'chrome'
96+
8397
self._browser_type: BrowserType = browser_type
8498
self._browser_launch_options: dict[str, Any] = default_launch_browser_options | (browser_launch_options or {})
8599
self._browser_new_context_options = browser_new_context_options or {}

src/crawlee/browsers/_types.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
if TYPE_CHECKING:
77
from playwright.async_api import Page
88

9-
BrowserType = Literal['chromium', 'firefox', 'webkit']
9+
BrowserType = Literal['chromium', 'firefox', 'webkit', 'chrome']
1010

1111

1212
@dataclass

src/crawlee/crawlers/_playwright/_playwright_crawler.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,10 @@ def __init__(
114114
browser_pool: A `BrowserPool` instance to be used for launching the browsers and getting pages.
115115
user_data_dir: Path to a user data directory, which stores browser session data like cookies
116116
and local storage.
117-
browser_type: The type of browser to launch ('chromium', 'firefox', or 'webkit').
117+
browser_type: The type of browser to launch:
118+
- 'chromium', 'firefox', 'webkit': Use Playwright-managed browsers
119+
- 'chrome': Use your locally installed Google Chrome browser. Requires Google Chrome to be installed on
120+
the system.
118121
This option should not be used if `browser_pool` is provided.
119122
browser_launch_options: Keyword arguments to pass to the browser launch method. These options are provided
120123
directly to Playwright's `browser_type.launch` method. For more details, refer to the
@@ -153,7 +156,7 @@ def __init__(
153156
):
154157
raise ValueError(
155158
'You cannot provide `headless`, `browser_type`, `browser_launch_options`, '
156-
'`browser_new_context_options`, `use_incognito_pages`, `user_data_dir` or'
159+
'`browser_new_context_options`, `use_incognito_pages`, `user_data_dir` or '
157160
'`fingerprint_generator` arguments when `browser_pool` is provided.'
158161
)
159162

@@ -496,7 +499,9 @@ class _PlaywrightCrawlerAdditionalOptions(TypedDict):
496499
"""A `BrowserPool` instance to be used for launching the browsers and getting pages."""
497500

498501
browser_type: NotRequired[BrowserType]
499-
"""The type of browser to launch ('chromium', 'firefox', or 'webkit').
502+
"""The type of browser to launch:
503+
- 'chromium', 'firefox', 'webkit': Use Playwright-managed browsers
504+
- 'chrome': Use your locally installed Google Chrome browser. Requires Google Chrome to be installed on the system.
500505
This option should not be used if `browser_pool` is provided."""
501506

502507
browser_launch_options: NotRequired[Mapping[str, Any]]

src/crawlee/fingerprint_suite/_header_generator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@
1111

1212

1313
def fingerprint_browser_type_from_playwright_browser_type(
14-
playwright_browser_type: Literal['chromium', 'firefox', 'webkit'],
14+
playwright_browser_type: Literal['chromium', 'firefox', 'webkit', 'chrome'],
1515
) -> SupportedBrowserType:
16-
if playwright_browser_type == 'chromium':
16+
if playwright_browser_type in {'chromium', 'chrome'}:
1717
return 'chrome'
1818
if playwright_browser_type == 'firefox':
1919
return 'firefox'

tests/unit/browsers/test_playwright_browser_plugin.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,13 @@ async def test_methods_raise_error_when_not_active() -> None:
6969

7070
async with plugin:
7171
assert plugin.active is True
72+
73+
74+
async def raise_error_if_chrome_and_executable_path() -> None:
75+
with pytest.raises(
76+
ValueError, match=r'Cannot use `use_chrome` with `Configuration.default_browser_path` or `executable_path` set.'
77+
):
78+
PlaywrightBrowserPlugin(
79+
browser_type='chrome',
80+
browser_launch_options={'executable_path': '/path/to/chrome'},
81+
)

0 commit comments

Comments
 (0)