Skip to content

Commit 1b4def9

Browse files
committed
Housekeeping
1 parent d8e41f1 commit 1b4def9

File tree

7 files changed

+93
-89
lines changed

7 files changed

+93
-89
lines changed

docs/changelog.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# scrapy-playwright changelog
22

3+
### [v0.0.27](https://github.com/scrapy-plugins/scrapy-playwright/releases/tag/v0.0.27) (2023-07-24)
4+
5+
* Override method only for navigation requests (#177)
6+
* Pass spider argument to _create_browser_context (#212)
7+
* await AsyncPlaywright.stop on close (#214)
8+
9+
310
### [v0.0.26](https://github.com/scrapy-plugins/scrapy-playwright/releases/tag/v0.0.26) (2023-02-01)
411

512
* Fix logging (pass extra args instead of updating log record factory)

tests/__init__.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from contextlib import asynccontextmanager
22

3+
from scrapy import Request
4+
from scrapy.http.response.html import HtmlResponse
35
from scrapy.utils.test import get_crawler
46

57

@@ -19,3 +21,11 @@ async def make_handler(settings_dict: dict):
1921
yield handler
2022
finally:
2123
await handler._close()
24+
25+
26+
def assert_correct_response(response: HtmlResponse, request: Request) -> None:
27+
assert isinstance(response, HtmlResponse)
28+
assert response.request is request
29+
assert response.url == request.url
30+
assert response.status == 200
31+
assert "playwright" in response.flags

tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def _is_coroutine(obj):
2121
return asyncio.iscoroutinefunction(obj) or inspect.isgeneratorfunction(obj)
2222

2323

24-
@pytest.mark.tryfirst
24+
@pytest.hookimpl(tryfirst=True)
2525
def pytest_pycollect_makeitem(collector, name, obj):
2626
"""A pytest hook to collect asyncio coroutines."""
2727
if collector.funcnamefilter(name) and _is_coroutine(obj):

tests/test_encoding.py

Lines changed: 0 additions & 62 deletions
This file was deleted.

tests/test_page_methods.py

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from scrapy_playwright.page import PageMethod
1111

12-
from tests import make_handler
12+
from tests import make_handler, assert_correct_response
1313
from tests.mockserver import StaticMockServer
1414

1515

@@ -32,14 +32,6 @@ async def test_page_methods():
3232
assert str(screenshot) == "<PageMethod for method 'screenshot'>"
3333

3434

35-
def assert_correct_response(response: HtmlResponse, request: Request) -> None:
36-
assert isinstance(response, HtmlResponse)
37-
assert response.request is request
38-
assert response.url == request.url
39-
assert response.status == 200
40-
assert "playwright" in response.flags
41-
42-
4335
class MixinPageMethodTestCase:
4436
@pytest.mark.asyncio
4537
async def test_page_non_page_method(self, caplog):
@@ -139,11 +131,7 @@ async def test_page_method_infinite_scroll(self):
139131
)
140132
resp = await handler._download_request(req, Spider("foo"))
141133

142-
assert isinstance(resp, HtmlResponse)
143-
assert resp.request is req
144-
assert resp.url == server.urljoin("/scroll.html")
145-
assert resp.status == 200
146-
assert "playwright" in resp.flags
134+
assert_correct_response(resp, req)
147135
assert len(resp.css("div.quote")) == 30
148136

149137
@pytest.mark.asyncio

tests/test_playwright_requests.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@
1313
TimeoutError as PlaywrightTimeoutError,
1414
)
1515
from scrapy import Spider, Request, FormRequest
16-
from scrapy.http import Response, HtmlResponse
16+
from scrapy.http import Response
1717

1818
from scrapy_playwright.handler import DEFAULT_CONTEXT_NAME
1919
from scrapy_playwright.page import PageMethod
2020

21-
from tests import make_handler
21+
from tests import make_handler, assert_correct_response
2222
from tests.mockserver import MockServer, StaticMockServer
2323

2424

@@ -44,11 +44,7 @@ async def test_basic_response(self):
4444
req = Request(server.urljoin("/index.html"), meta=meta)
4545
resp = await handler._download_request(req, Spider("foo"))
4646

47-
assert isinstance(resp, HtmlResponse)
48-
assert resp.request is req
49-
assert resp.url == req.url
50-
assert resp.status == 200
51-
assert "playwright" in resp.flags
47+
assert_correct_response(resp, req)
5248
assert resp.css("a::text").getall() == ["Lorem Ipsum", "Infinite Scroll"]
5349
assert isinstance(resp.meta["playwright_page"], PlaywrightPage)
5450
assert resp.meta["playwright_page"].url == resp.url
@@ -64,10 +60,7 @@ async def test_post_request(self):
6460
)
6561
resp = await handler._download_request(req, Spider("foo"))
6662

67-
assert resp.request is req
68-
assert resp.url == req.url
69-
assert resp.status == 200
70-
assert "playwright" in resp.flags
63+
assert_correct_response(resp, req)
7164
assert "Request body: foo=bar" in resp.text
7265

7366
@pytest.mark.asyncio
@@ -166,6 +159,7 @@ async def test_route_continue_exception(self, logger):
166159
route = MagicMock()
167160
playwright_request = AsyncMock()
168161
playwright_request.url = scrapy_request.url
162+
playwright_request.method = scrapy_request.method
169163
playwright_request.is_navigation_request = MagicMock(return_value=True)
170164
playwright_request.all_headers.return_value = {}
171165

tests/test_utils.py

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,12 @@
44
import pytest
55
from playwright.async_api import Error as PlaywrightError
66
from scrapy import Spider
7-
from scrapy_playwright._utils import _get_page_content, _NAVIGATION_ERROR_MSG
7+
from scrapy.http.headers import Headers
8+
from scrapy_playwright._utils import _get_page_content, _NAVIGATION_ERROR_MSG, _encode_body
9+
10+
11+
# page content retrieval
12+
# ======================
813

914

1015
@pytest.mark.skipif(sys.version_info < (3, 8), reason="AsyncMock was added on Python 3.8")
@@ -67,3 +72,65 @@ async def test_get_page_content_reraise_unknown_exception():
6772
scrapy_request_url="https://example.org",
6873
scrapy_request_method="GET",
6974
)
75+
76+
77+
# body encoding
78+
# =============
79+
80+
81+
def body_str(charset: str, content: str = "áéíóú") -> str:
82+
return f"""
83+
<!doctype html>
84+
<html>
85+
<head>
86+
<meta charset="{charset}">
87+
</head>
88+
<body>
89+
<p>{content}</p>
90+
</body>
91+
</html>
92+
""".strip()
93+
94+
95+
@pytest.mark.asyncio
96+
async def test_encode_from_headers():
97+
"""Charset declared in headers takes precedence"""
98+
text = body_str(charset="gb2312")
99+
body, encoding = _encode_body(
100+
headers=Headers({"content-type": "text/html; charset=ISO-8859-1"}),
101+
text=text,
102+
)
103+
assert encoding == "cp1252"
104+
assert body == text.encode(encoding)
105+
106+
107+
@pytest.mark.asyncio
108+
async def test_encode_from_body():
109+
"""No charset declared in headers, use the one declared in the body"""
110+
text = body_str(charset="gb2312")
111+
body, encoding = _encode_body(headers=Headers({}), text=text)
112+
assert encoding == "gb18030"
113+
assert body == text.encode(encoding)
114+
115+
116+
@pytest.mark.asyncio
117+
async def test_encode_fallback_utf8():
118+
"""No charset declared, use utf-8 as fallback"""
119+
text = "<html>áéíóú</html>"
120+
body, encoding = _encode_body(headers=Headers(), text=text)
121+
assert encoding == "utf-8"
122+
assert body == text.encode(encoding)
123+
124+
125+
@pytest.mark.asyncio
126+
async def test_encode_mismatch():
127+
"""Charset declared in headers and body do not match, and the headers
128+
one fails to encode: use the one in the body (first one that works)
129+
"""
130+
text = body_str(charset="gb2312", content="空手道")
131+
body, encoding = _encode_body(
132+
headers=Headers({"content-type": "text/html; charset=ISO-8859-1"}),
133+
text=text,
134+
)
135+
assert encoding == "gb18030"
136+
assert body == text.encode(encoding)

0 commit comments

Comments
 (0)