Skip to content

Commit

Permalink
Merge pull request #291 from slmi/cookies_render
Browse files Browse the repository at this point in the history
[render] add: send session.cookies to render
  • Loading branch information
kennethreitz authored May 31, 2019
2 parents 89fef4c + 8753f79 commit 420fe61
Showing 1 changed file with 73 additions and 5 deletions.
78 changes: 73 additions & 5 deletions requests_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import pyppeteer
import requests
import http.cookiejar
from pyquery import PyQuery

from fake_useragent import UserAgent
Expand Down Expand Up @@ -499,14 +500,19 @@ async def __anext__(self):
def add_next_symbol(self, next_symbol):
self.next_symbol.append(next_symbol)

async def _async_render(self, *, url: str, script: str = None, scrolldown, sleep: int, wait: float, reload, content: Optional[str], timeout: Union[float, int], keep_page: bool):
async def _async_render(self, *, url: str, script: str = None, scrolldown, sleep: int, wait: float, reload, content: Optional[str], timeout: Union[float, int], keep_page: bool, cookies: list = [{}]):
""" Handle page creation and js rendering. Internal use for render/arender methods. """
try:
page = await self.browser.newPage()

# Wait before rendering the page, to prevent timeouts.
await asyncio.sleep(wait)

if cookies:
for cookie in cookies:
if cookie:
await page.setCookie(cookie)

# Load the given page (GET request, obviously.)
if reload:
await page.goto(url, options={'timeout': int(timeout * 1000)})
Expand Down Expand Up @@ -538,7 +544,61 @@ async def _async_render(self, *, url: str, script: str = None, scrolldown, sleep
page = None
return None

def render(self, retries: int = 8, script: str = None, wait: float = 0.2, scrolldown=False, sleep: int = 0, reload: bool = True, timeout: Union[float, int] = 8.0, keep_page: bool = False):
def _convert_cookiejar_to_render(self, session_cookiejar):
"""
Convert HTMLSession.cookies:cookiejar[] for browser.newPage().setCookie
"""
# | setCookie(self, *cookies:dict) -> None
# | Set cookies.
# |
# | ``cookies`` should be dictionaries which contain these fields:
# |
# | * ``name`` (str): **required**
# | * ``value`` (str): **required**
# | * ``url`` (str)
# | * ``domain`` (str)
# | * ``path`` (str)
# | * ``expires`` (number): Unix time in seconds
# | * ``httpOnly`` (bool)
# | * ``secure`` (bool)
# | * ``sameSite`` (str): ``'Strict'`` or ``'Lax'``
cookie_render = {}
def __convert(cookiejar, key):
try:
v = eval ("cookiejar."+key)
if not v: kv = ''
else: kv = {key: v}
except:
kv = ''
return kv

keys = [
'name',
'value',
'url',
'domain',
'path',
'sameSite',
'expires',
'httpOnly',
'secure',
]
for key in keys:
cookie_render.update(__convert(session_cookiejar, key))
return cookie_render

def _convert_cookiesjar_to_render(self):
"""
Convert HTMLSession.cookies for browser.newPage().setCookie
Return a list of dict
"""
cookies_render = []
if isinstance(self.session.cookies, http.cookiejar.CookieJar):
for cookie in self.session.cookies:
cookies_render.append(self._convert_cookiejar_to_render(cookie))
return cookies_render

def render(self, retries: int = 8, script: str = None, wait: float = 0.2, scrolldown=False, sleep: int = 0, reload: bool = True, timeout: Union[float, int] = 8.0, keep_page: bool = False, cookies: list = [{}], send_cookies_session: bool = False):
"""Reloads the response in Chromium, and replaces HTML content
with an updated version, with JavaScript executed.
Expand All @@ -550,6 +610,9 @@ def render(self, retries: int = 8, script: str = None, wait: float = 0.2, scroll
:param reload: If ``False``, content will not be loaded from the browser, but will be provided from memory.
:param keep_page: If ``True`` will allow you to interact with the browser page through ``r.html.page``.
:param send_cookies_session: If ``True`` send ``HTMLSession.cookies`` convert.
:param cookies: If not ``empty`` send ``cookies``.
If ``scrolldown`` is specified, the page will scrolldown the specified
number of times, after sleeping the specified amount of time
(e.g. ``scrolldown=10, sleep=1``).
Expand Down Expand Up @@ -590,12 +653,14 @@ def render(self, retries: int = 8, script: str = None, wait: float = 0.2, scroll
if self.url == DEFAULT_URL:
reload = False

if send_cookies_session:
cookies = self._convert_cookiesjar_to_render()

for i in range(retries):
if not content:
try:

content, result, page = self.session.loop.run_until_complete(self._async_render(url=self.url, script=script, sleep=sleep, wait=wait, content=self.html, reload=reload, scrolldown=scrolldown, timeout=timeout, keep_page=keep_page))
content, result, page = self.session.loop.run_until_complete(self._async_render(url=self.url, script=script, sleep=sleep, wait=wait, content=self.html, reload=reload, scrolldown=scrolldown, timeout=timeout, keep_page=keep_page, cookies=cookies))
except TypeError:
pass
else:
Expand All @@ -609,7 +674,7 @@ def render(self, retries: int = 8, script: str = None, wait: float = 0.2, scroll
self.page = page
return result

async def arender(self, retries: int = 8, script: str = None, wait: float = 0.2, scrolldown=False, sleep: int = 0, reload: bool = True, timeout: Union[float, int] = 8.0, keep_page: bool = False):
async def arender(self, retries: int = 8, script: str = None, wait: float = 0.2, scrolldown=False, sleep: int = 0, reload: bool = True, timeout: Union[float, int] = 8.0, keep_page: bool = False, cookies: list = [{}], send_cookies_session: bool = False):
""" Async version of render. Takes same parameters. """

self.browser = await self.session.browser
Expand All @@ -619,11 +684,14 @@ async def arender(self, retries: int = 8, script: str = None, wait: float = 0.2,
if self.url == DEFAULT_URL:
reload = False

if send_cookies_session:
cookies = self._convert_cookiesjar_to_render()

for _ in range(retries):
if not content:
try:

content, result, page = await self._async_render(url=self.url, script=script, sleep=sleep, wait=wait, content=self.html, reload=reload, scrolldown=scrolldown, timeout=timeout, keep_page=keep_page)
content, result, page = await self._async_render(url=self.url, script=script, sleep=sleep, wait=wait, content=self.html, reload=reload, scrolldown=scrolldown, timeout=timeout, keep_page=keep_page, cookies=cookies)
except TypeError:
pass
else:
Expand Down

0 comments on commit 420fe61

Please sign in to comment.