diff --git a/docs/usage/session.rst b/docs/usage/session.rst index 8e3aad61..da633187 100644 --- a/docs/usage/session.rst +++ b/docs/usage/session.rst @@ -321,7 +321,8 @@ Overriding session configs For spiders that target a single website, using settings and request metadata keys for :ref:`session initialization ` and :ref:`session checking ` should do the job. However, for broad-crawl spiders, -:doc:`multi-website spiders `, or for code +:doc:`multi-website spiders `, to modify +session-using requests based on session initialization responses, or for code reusability purposes, you might want to define different session configs for different websites. @@ -353,6 +354,10 @@ to tell whether a request is a :ref:`session initialization request .. autofunction:: scrapy_zyte_api.is_session_init_request +To get the session ID of a given request, use: + +.. autofunction:: scrapy_zyte_api.get_request_session_id + Classes decorated with :func:`~scrapy_zyte_api.session_config` are registered into :data:`~scrapy_zyte_api.session_config_registry`: diff --git a/scrapy_zyte_api/__init__.py b/scrapy_zyte_api/__init__.py index ff25d9a1..443f19ef 100644 --- a/scrapy_zyte_api/__init__.py +++ b/scrapy_zyte_api/__init__.py @@ -21,6 +21,7 @@ LocationSessionConfig, ScrapyZyteAPISessionDownloaderMiddleware, SessionConfig, + get_request_session_id, is_session_init_request, session_config, ) diff --git a/scrapy_zyte_api/_session.py b/scrapy_zyte_api/_session.py index 5d403440..7b51ccb6 100644 --- a/scrapy_zyte_api/_session.py +++ b/scrapy_zyte_api/_session.py @@ -25,6 +25,25 @@ ZYTE_API_META_KEYS = ("zyte_api", "zyte_api_automap", "zyte_api_provider") +def get_request_session_id(request: Request) -> Optional[str]: + """Return the session ID of *request*, or ``None`` if it does not have a + session ID assigned.""" + for meta_key in ZYTE_API_META_KEYS: + if meta_key not in request.meta: + continue + session_id = request.meta[meta_key].get("session", {}).get("id", None) + if session_id: + return session_id + logger.warning( + f"Request {request} had no session ID assigned, unexpectedly. " + f"If you are sure this issue is not caused by your own code, " + f"please report this at " + f"https://github.com/scrapy-plugins/scrapy-zyte-api/issues/new " + f"providing a minimal, reproducible example." + ) + return None + + def is_session_init_request(request): """Return ``True`` if the request is a :ref:`session initialization request ` or ``False`` otherwise.""" @@ -209,6 +228,61 @@ def enabled(self, request: Request) -> bool: """ return request.meta.get("zyte_api_session_enabled", self._enabled) + def process_request(self, request: Request) -> Optional[Request]: + """Process *request* after it has been assigned a session. + + Return ``None`` to send the request as is, or return a new request + object to replace the original request. + + The default implementation does not modify the request. + + You can combine this method and :meth:`check` to modify requests based + on session initialization responses. For example: + + #. In :meth:`__init__`, create a dictionary to store session data: + + .. code-block:: python + + def __init__(self, crawler): + super().__init__(crawler) + self.session_data = {} + + #. In :meth:`check`, store data from the session initialization + response in ``session_data``: + + .. code-block:: python + + def check(self, response: Response, request: Request) -> bool: + if scrapy_zyte_api.is_session_init_request(request): + session_id = scrapy_zyte_api.get_request_session_id(request) + self.session_data[session_id] = { + "csrf_token": response.css(".csrf-token::text").get(), + } + return super().check(response, request) + + #. In :meth:`process_request`, read the session data and act + accordingly, either modifying the request in place where possible, + e.g.: + + .. code-block:: python + + def process_request(self, request: Request) -> Optional[Request]: + session_id = scrapy_zyte_api.get_request_session_id(request) + csrf_token = self.session_data[session_id]["csrf_token"] + request.headers["CSRF-Token"] = csrf_token + + Or returning an entirely new request, e.g.: + + .. code-block:: python + + def process_request(self, request: Request) -> Optional[Request]: + session_id = get_request_session_id(request) + csrf_token = self.session_data[session_id]["csrf_token"] + new_url = w3lib.url.add_or_replace_parameter(request.url, "csrf_token", csrf_token) + return request.replace(url=new_url) + """ + return None + def pool(self, request: Request) -> str: """Return the ID of the session pool to use for *request*. @@ -766,22 +840,6 @@ def is_init_request(self, request: Request) -> bool: """ return request.meta.get(SESSION_INIT_META_KEY, False) - def _get_request_session_id(self, request: Request) -> Optional[str]: - for meta_key in ZYTE_API_META_KEYS: - if meta_key not in request.meta: - continue - session_id = request.meta[meta_key].get("session", {}).get("id", None) - if session_id: - return session_id - logger.warning( - f"Request {request} had no session ID assigned, unexpectedly. " - f"If you are sure this issue is not caused by your own code, " - f"please report this at " - f"https://github.com/scrapy-plugins/scrapy-zyte-api/issues/new " - f"providing a minimal, reproducible example." - ) - return None - def _start_session_refresh(self, session_id: str, request: Request, pool: str): try: self._pools[pool].remove(session_id) @@ -799,11 +857,20 @@ def _start_session_refresh(self, session_id: str, request: Request, pool: str): pass def _start_request_session_refresh(self, request: Request, pool: str): - session_id = self._get_request_session_id(request) + session_id = get_request_session_id(request) if session_id is None: return self._start_session_refresh(session_id, request, pool) + @staticmethod + def allow_new_session_assignments(request): + # Since a response has been received or an exception raised, allow new + # session assignments for this request, e.g. if a new request based on + # this one (e.g. requests.replace()) is returned by the + # process_response or process_exception methods of a later downloader + # middleware. + request.meta.pop("_zyte_api_session_assigned", None) + async def check(self, response: Response, request: Request) -> bool: """Check the response for signs of session expiration, update the internal session pool accordingly, and return ``False`` if the session @@ -838,16 +905,23 @@ async def check(self, response: Response, request: Request) -> bool: self._start_request_session_refresh(request, pool) return False - async def assign(self, request: Request): - """Assign a working session to *request*.""" + async def assign(self, request: Request) -> Optional[Request]: + """Assign a working session to *request*. + + If the session config creates a new request instead of modifying the + request in place, return that new request, to replace the received + request. + """ assert self._crawler.stats with self._fatal_error_handler: - if self.is_init_request(request): - return + if self.is_init_request(request) or request.meta.get( + "_zyte_api_session_assigned", False + ): + return None session_config = self._get_session_config(request) if not session_config.enabled(request): self._crawler.stats.inc_value("scrapy-zyte-api/sessions/use/disabled") - return + return None session_id = await self._next(request) # Note: If there is a session set already (e.g. a request being # retried), it is overridden. @@ -870,6 +944,13 @@ async def assign(self, request: Request): request.meta[meta_key] = {} request.meta[meta_key]["session"] = {"id": session_id} request.meta.setdefault("dont_merge_cookies", True) + # Mark this request as having a session assigned already, so that + # if a later downloader middleware process_request call returns a + # new request object (with a shallow copy of its meta), a new call + # to the process_request method of the session management + # middleware does not assign a new session again. + request.meta.setdefault("_zyte_api_session_assigned", True) + return session_config.process_request(request) def is_enabled(self, request: Request) -> bool: session_config = self._get_session_config(request) @@ -882,7 +963,7 @@ def handle_error(self, request: Request): self._crawler.stats.inc_value( f"scrapy-zyte-api/sessions/pools/{pool}/use/failed" ) - session_id = self._get_request_session_id(request) + session_id = get_request_session_id(request) if session_id is not None: self._errors[session_id] += 1 if self._errors[session_id] < self._max_errors: @@ -909,14 +990,19 @@ def __init__(self, crawler: Crawler): self._crawler = crawler self._sessions = _SessionManager(crawler) - async def process_request(self, request: Request, spider: Spider) -> None: - await self._sessions.assign(request) + async def process_request( + self, request: Request, spider: Spider + ) -> Optional[Request]: + return await self._sessions.assign(request) async def process_response( self, request: Request, response: Response, spider: Spider ) -> Union[Request, Response, None]: if isinstance(response, DummyResponse): return response + + self._sessions.allow_new_session_assignments(request) + passed = await self._sessions.check(response, request) if not passed: new_request_or_none = get_retry_request( @@ -939,6 +1025,8 @@ async def process_exception( ): return None + self._sessions.allow_new_session_assignments(request) + if exception.parsed.type == "/problem/session-expired": self._sessions.handle_expiration(request) reason = "session_expired" diff --git a/tests/test_providers.py b/tests/test_providers.py index e9ac9ab4..be6dce81 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -1,4 +1,5 @@ from collections import defaultdict +from copy import copy from typing import Annotated import pytest @@ -10,7 +11,6 @@ from scrapy import Request, Spider from scrapy_poet import DummyResponse from scrapy_poet.utils.testing import HtmlResource, crawl_single_item -from scrapy_poet.utils.testing import create_scrapy_settings as _create_scrapy_settings from twisted.internet import reactor from twisted.web.client import Agent, readBody from web_poet import ( @@ -52,16 +52,6 @@ PROVIDER_PARAMS = {"geolocation": "IE"} -def create_scrapy_settings(): - settings = _create_scrapy_settings() - for setting, value in SETTINGS.items(): - if setting.endswith("_MIDDLEWARES") and settings[setting]: - settings[setting].update(value) - else: - settings[setting] = value - return settings - - @attrs.define class ProductPage(BasePage): html: BrowserHtml @@ -108,7 +98,7 @@ def parse_(self, response: DummyResponse, page: ProductPage): @ensureDeferred async def test_provider(mockserver): - settings = create_scrapy_settings() + settings = copy(SETTINGS) settings["ZYTE_API_URL"] = mockserver.urljoin("/") settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} item, url, _ = await crawl_single_item(ZyteAPISpider, HtmlResource, settings) @@ -155,7 +145,7 @@ def parse_( # type: ignore[override] port = get_ephemeral_port() handle_urls(f"{fresh_mockserver.host}:{port}")(MyPage) - settings = create_scrapy_settings() + settings = copy(SETTINGS) settings["ZYTE_API_URL"] = fresh_mockserver.urljoin("/") settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 1100} item, url, _ = await crawl_single_item( @@ -182,7 +172,7 @@ def parse_(self, response: DummyResponse, product: Product, my_item: MyItem): # port = get_ephemeral_port() handle_urls(f"{fresh_mockserver.host}:{port}")(MyPage) - settings = create_scrapy_settings() + settings = copy(SETTINGS) settings["ZYTE_API_URL"] = fresh_mockserver.urljoin("/") settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 1100} item, url, _ = await crawl_single_item( @@ -210,7 +200,7 @@ def parse_(self, response: DummyResponse, product: Product, browser_response: Br port = get_ephemeral_port() handle_urls(f"{fresh_mockserver.host}:{port}")(MyPage) - settings = create_scrapy_settings() + settings = copy(SETTINGS) settings["ZYTE_API_URL"] = fresh_mockserver.urljoin("/") settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 1} item, url, _ = await crawl_single_item( @@ -228,7 +218,7 @@ def parse_(self, response: DummyResponse, product: Product, browser_response: Br @ensureDeferred async def test_provider_params_setting(mockserver): - settings = create_scrapy_settings() + settings = copy(SETTINGS) settings["ZYTE_API_URL"] = mockserver.urljoin("/") settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} settings["ZYTE_API_PROVIDER_PARAMS"] = PROVIDER_PARAMS @@ -239,7 +229,7 @@ async def test_provider_params_setting(mockserver): @ensureDeferred async def test_provider_params_meta(mockserver): - settings = create_scrapy_settings() + settings = copy(SETTINGS) settings["ZYTE_API_URL"] = mockserver.urljoin("/") settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} _, _, crawler = await crawl_single_item( @@ -251,7 +241,7 @@ async def test_provider_params_meta(mockserver): @ensureDeferred async def test_provider_params_remove_unused_options(mockserver): - settings = create_scrapy_settings() + settings = copy(SETTINGS) settings["ZYTE_API_URL"] = mockserver.urljoin("/") settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} settings["ZYTE_API_PROVIDER_PARAMS"] = { @@ -281,7 +271,7 @@ def parse_(self, response: DummyResponse, page: AnnotatedProductPage): # type: "product2": page.product, } - settings = create_scrapy_settings() + settings = copy(SETTINGS) settings["ZYTE_API_URL"] = mockserver.urljoin("/") settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} @@ -311,7 +301,7 @@ def parse_(self, response: DummyResponse, page: AnnotatedProductPage): # type: "product": page.product, } - settings = create_scrapy_settings() + settings = copy(SETTINGS) settings["ZYTE_API_URL"] = mockserver.urljoin("/") settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} @@ -332,7 +322,7 @@ def parse_(self, response: DummyResponse, page: AnnotatedProductPage): # type: "product": page.product, } - settings = create_scrapy_settings() + settings = copy(SETTINGS) settings["ZYTE_API_URL"] = mockserver.urljoin("/") settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} settings["ZYTE_API_PROVIDER_PARAMS"] = { @@ -365,7 +355,7 @@ def parse_(self, response: DummyResponse, page: GeoProductPage): # type: ignore "product": page.product, } - settings = create_scrapy_settings() + settings = copy(SETTINGS) settings["ZYTE_API_URL"] = mockserver.urljoin("/") settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} @@ -384,7 +374,7 @@ class GeoZyteAPISpider(ZyteAPISpider): def parse_(self, response: DummyResponse, page: GeoProductPage): # type: ignore[override] pass - settings = create_scrapy_settings() + settings = copy(SETTINGS) settings["ZYTE_API_URL"] = mockserver.urljoin("/") settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} @@ -422,7 +412,7 @@ def parse_(self, response: DummyResponse, page: CustomAttrsPage): # type: ignor "custom_attrs": page.custom_attrs, } - settings = create_scrapy_settings() + settings = copy(SETTINGS) settings["ZYTE_API_URL"] = mockserver.urljoin("/") settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} @@ -465,7 +455,7 @@ def parse_(self, response: DummyResponse, page: CustomAttrsPage): # type: ignor "custom_attrs": page.custom_attrs, } - settings = create_scrapy_settings() + settings = copy(SETTINGS) settings["ZYTE_API_URL"] = mockserver.urljoin("/") settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} @@ -500,7 +490,7 @@ def _log_request(self, params): def provider_settings(server): - settings = create_scrapy_settings() + settings = copy(SETTINGS) settings["ZYTE_API_URL"] = server.urljoin("/") settings["ZYTE_API_TRANSPARENT_MODE"] = True settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 1100} @@ -1122,7 +1112,7 @@ def parse_(self, response: DummyResponse, page: ActionProductPage): # type: ign "action_results": page.actions, } - settings = create_scrapy_settings() + settings = copy(SETTINGS) settings["ZYTE_API_URL"] = mockserver.urljoin("/") settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} @@ -1160,7 +1150,7 @@ def start_requests(self): def parse(self, response: DummyResponse, product: Product): pass - settings = create_scrapy_settings() + settings = copy(SETTINGS) settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} settings["ZYTE_API_URL"] = mockserver.urljoin("/") _, _, crawler = await crawl_single_item(TestSpider, HtmlResource, settings) @@ -1215,7 +1205,7 @@ def start_requests(self): def parse(self, response: DummyResponse, product: Product): pass - settings = create_scrapy_settings() + settings = copy(SETTINGS) settings["STATS_CLASS"] = OnlyOnceStatsCollector settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} settings["ZYTE_API_AUTO_FIELD_STATS"] = True @@ -1264,7 +1254,7 @@ def start_requests(self): def parse(self, response: DummyResponse, product: Product): pass - settings = create_scrapy_settings() + settings = copy(SETTINGS) settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} settings["ZYTE_API_AUTO_FIELD_STATS"] = True settings["ZYTE_API_URL"] = mockserver.urljoin("/") @@ -1416,7 +1406,7 @@ def start_requests(self): def parse(self, response: DummyResponse, product: Product): pass - settings = create_scrapy_settings() + settings = copy(SETTINGS) settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} settings["ZYTE_API_AUTO_FIELD_STATS"] = True settings["ZYTE_API_URL"] = mockserver.urljoin("/") @@ -1453,7 +1443,7 @@ def parse(self, response: DummyResponse, product: Product): product.name = "foo" yield product - settings = create_scrapy_settings() + settings = copy(SETTINGS) settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} settings["ZYTE_API_AUTO_FIELD_STATS"] = True settings["ZYTE_API_URL"] = mockserver.urljoin("/") @@ -1504,7 +1494,7 @@ def start_requests(self): def parse(self, response: DummyResponse, page: MyProductPage): pass - settings = create_scrapy_settings() + settings = copy(SETTINGS) settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} settings["ZYTE_API_AUTO_FIELD_STATS"] = True settings["ZYTE_API_URL"] = mockserver.urljoin("/") @@ -1570,7 +1560,7 @@ def start_requests(self): def parse(self, response: DummyResponse, page: AltProductPage): pass - settings = create_scrapy_settings() + settings = copy(SETTINGS) settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} settings["ZYTE_API_AUTO_FIELD_STATS"] = True settings["ZYTE_API_URL"] = mockserver.urljoin("/") @@ -1621,7 +1611,7 @@ def start_requests(self): def parse(self, response: DummyResponse, product: Product): pass - settings = create_scrapy_settings() + settings = copy(SETTINGS) settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} settings["ZYTE_API_AUTO_FIELD_STATS"] = True settings["ZYTE_API_URL"] = mockserver.urljoin("/") @@ -1666,7 +1656,7 @@ def start_requests(self): def parse(self, response: DummyResponse, product: Product): pass - settings = create_scrapy_settings() + settings = copy(SETTINGS) settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} settings["ZYTE_API_AUTO_FIELD_STATS"] = True settings["ZYTE_API_URL"] = mockserver.urljoin("/") @@ -1712,7 +1702,7 @@ def start_requests(self): def parse(self, response: DummyResponse, product: Product): pass - settings = create_scrapy_settings() + settings = copy(SETTINGS) settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} settings["ZYTE_API_AUTO_FIELD_STATS"] = True settings["ZYTE_API_URL"] = mockserver.urljoin("/") @@ -1755,7 +1745,7 @@ def parse_( @ensureDeferred async def test_multiple_types(mockserver): - settings = create_scrapy_settings() + settings = copy(SETTINGS) settings["ZYTE_API_URL"] = mockserver.urljoin("/") settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} item, url, _ = await crawl_single_item( diff --git a/tests/test_sessions.py b/tests/test_sessions.py index cd5e401b..c607c820 100644 --- a/tests/test_sessions.py +++ b/tests/test_sessions.py @@ -1,7 +1,7 @@ from collections import deque from copy import copy, deepcopy from math import floor -from typing import Any, Dict, Tuple, Union +from typing import Any, Dict, Optional, Tuple, Union from unittest.mock import patch import pytest @@ -19,6 +19,7 @@ SESSION_DEFAULT_RETRY_POLICY, LocationSessionConfig, SessionConfig, + get_request_session_id, is_session_init_request, session_config, ) @@ -2081,6 +2082,123 @@ class CustomSessionConfig(SessionConfig): pass +@ensureDeferred +async def test_session_config_process_request_change_request(mockserver): + pytest.importorskip("web_poet") + + @session_config("example.com") + class CustomSessionConfig(SessionConfig): + + def __init__(self, crawler): + super().__init__(crawler) + self.session_data = {} + + def check(self, response: Response, request: Request) -> bool: + if is_session_init_request(request): + session_id = get_request_session_id(request) + self.session_data[session_id] = {"foo": "bar"} + return super().check(response, request) + + def process_request(self, request: Request) -> Optional[Request]: + session_id = get_request_session_id(request) + foo = self.session_data[session_id]["foo"] + request.headers["foo"] = foo + + settings = { + "RETRY_TIMES": 0, + "ZYTE_API_URL": mockserver.urljoin("/"), + "ZYTE_API_SESSION_ENABLED": True, + "ZYTE_API_SESSION_MAX_BAD_INITS": 1, + } + request_headers = [] + + class TestSpider(Spider): + name = "test" + start_urls = ["https://example.com"] + + def parse(self, response): + request_headers.append(response.request.headers["foo"]) + + crawler = await get_crawler(settings, spider_cls=TestSpider, setup_engine=False) + await crawler.crawl() + + assert request_headers == [b"bar"] + + session_stats = { + k: v + for k, v in crawler.stats.get_stats().items() + if k.startswith("scrapy-zyte-api/sessions") + } + assert session_stats == { + "scrapy-zyte-api/sessions/pools/example.com/init/check-passed": 1, + "scrapy-zyte-api/sessions/pools/example.com/use/check-passed": 1, + } + + # Clean up the session config registry, and check it, otherwise we could + # affect other tests. + + session_config_registry.__init__() # type: ignore[misc] + + +@ensureDeferred +async def test_session_config_process_request_new_request(mockserver): + pytest.importorskip("web_poet") + + @session_config("example.com") + class CustomSessionConfig(SessionConfig): + + def __init__(self, crawler): + super().__init__(crawler) + self.session_data = {} + + def check(self, response: Response, request: Request) -> bool: + if is_session_init_request(request): + session_id = get_request_session_id(request) + self.session_data[session_id] = {"foo": "bar"} + return super().check(response, request) + + def process_request(self, request: Request) -> Optional[Request]: + session_id = get_request_session_id(request) + foo = self.session_data[session_id]["foo"] + new_url = request.url.rstrip("/") + f"/{foo}" + return request.replace(url=new_url) + + settings = { + "RETRY_TIMES": 0, + "ZYTE_API_URL": mockserver.urljoin("/"), + "ZYTE_API_SESSION_ENABLED": True, + "ZYTE_API_SESSION_MAX_BAD_INITS": 1, + } + output_urls = [] + + class TestSpider(Spider): + name = "test" + start_urls = ["https://example.com"] + + def parse(self, response): + output_urls.append(response.url) + + crawler = await get_crawler(settings, spider_cls=TestSpider, setup_engine=False) + await crawler.crawl() + + assert output_urls == ["https://example.com/bar"] + + session_stats = { + k: v + for k, v in crawler.stats.get_stats().items() + if k.startswith("scrapy-zyte-api/sessions") + } + assert session_stats == { + "scrapy-zyte-api/sessions/pools/example.com/init/check-passed": 1, + "scrapy-zyte-api/sessions/pools/example.com/use/check-passed": 1, + } + + # Clean up the session config registry, and check it, otherwise we could + # affect other tests. + + session_config_registry.__init__() # type: ignore[misc] + + @ensureDeferred async def test_location_session_config(mockserver): pytest.importorskip("web_poet")