|
| 1 | +from asyncio.subprocess import Process as AsyncioProcess |
| 2 | +from unittest import IsolatedAsyncioTestCase |
| 3 | +from unittest.mock import MagicMock, patch |
| 4 | + |
| 5 | +import pytest |
| 6 | +from playwright.async_api import PlaywrightContextManager |
| 7 | +from scrapy.exceptions import NotConfigured |
| 8 | +from scrapy.extensions.memusage import MemoryUsage |
| 9 | + |
| 10 | +from scrapy_playwright.memusage import ScrapyPlaywrightMemoryUsageExtension |
| 11 | +from scrapy_playwright.handler import ScrapyPlaywrightDownloadHandler |
| 12 | + |
| 13 | + |
| 14 | +SCHEMA_PID_MAP = {"http": 123, "https": 456} |
| 15 | + |
| 16 | + |
| 17 | +def mock_crawler_with_handlers() -> dict: |
| 18 | + handlers = {} |
| 19 | + for schema, pid in SCHEMA_PID_MAP.items(): |
| 20 | + process = MagicMock() |
| 21 | + process.pid = pid |
| 22 | + handlers[schema] = MagicMock(spec=ScrapyPlaywrightDownloadHandler) |
| 23 | + handlers[schema].playwright_context_manager._connection._transport._proc = process |
| 24 | + crawler = MagicMock() |
| 25 | + crawler.engine.downloader.handlers._handlers = handlers |
| 26 | + return crawler |
| 27 | + |
| 28 | + |
| 29 | +def raise_import_error(*args, **kwargs): |
| 30 | + raise ImportError |
| 31 | + |
| 32 | + |
| 33 | +class MockMemoryInfo: |
| 34 | + rss = 999 |
| 35 | + |
| 36 | + |
| 37 | +@patch("scrapy.extensions.memusage.MailSender") |
| 38 | +class TestMemoryUsageExtension(IsolatedAsyncioTestCase): |
| 39 | + async def test_process_availability(self, _MailSender): |
| 40 | + """The main node process should be accessible from the context manager""" |
| 41 | + ctx_manager = PlaywrightContextManager() |
| 42 | + await ctx_manager.start() |
| 43 | + assert isinstance(ctx_manager._connection._transport._proc, AsyncioProcess) |
| 44 | + await ctx_manager.__aexit__() |
| 45 | + |
| 46 | + @patch("scrapy_playwright.memusage.import_module", side_effect=raise_import_error) |
| 47 | + async def test_psutil_not_available_extension_disabled(self, _import_module, _MailSender): |
| 48 | + crawler = MagicMock() |
| 49 | + with pytest.raises(NotConfigured): |
| 50 | + ScrapyPlaywrightMemoryUsageExtension(crawler) |
| 51 | + |
| 52 | + async def test_get_process_ids_ok(self, _MailSender): |
| 53 | + crawler = mock_crawler_with_handlers() |
| 54 | + extension = ScrapyPlaywrightMemoryUsageExtension(crawler) |
| 55 | + assert extension._get_main_process_ids() == list(SCHEMA_PID_MAP.values()) |
| 56 | + |
| 57 | + async def test_get_process_ids_error(self, _MailSender): |
| 58 | + crawler = mock_crawler_with_handlers() |
| 59 | + crawler.engine.downloader.handlers._handlers = MagicMock() |
| 60 | + crawler.engine.downloader.handlers._handlers.values.side_effect = raise_import_error |
| 61 | + extension = ScrapyPlaywrightMemoryUsageExtension(crawler) |
| 62 | + assert extension._get_main_process_ids() == [] |
| 63 | + |
| 64 | + async def test_get_descendant_processes(self, _MailSender): |
| 65 | + p1 = MagicMock() |
| 66 | + p2 = MagicMock() |
| 67 | + p3 = MagicMock() |
| 68 | + p4 = MagicMock() |
| 69 | + p2.children.return_value = [p3, p4] |
| 70 | + p1.children.return_value = [p2] |
| 71 | + crawler = MagicMock() |
| 72 | + extension = ScrapyPlaywrightMemoryUsageExtension(crawler) |
| 73 | + assert extension._get_descendant_processes(p1) == [p2, p3, p4] |
| 74 | + |
| 75 | + async def test_get_total_process_size(self, _MailSender): |
| 76 | + crawler = MagicMock() |
| 77 | + extension = ScrapyPlaywrightMemoryUsageExtension(crawler) |
| 78 | + extension.psutil = MagicMock() |
| 79 | + extension.psutil.Process.return_value.memory_info.return_value = MockMemoryInfo() |
| 80 | + extension._get_main_process_ids = MagicMock(return_value=[1, 2, 3]) |
| 81 | + expected_size = MockMemoryInfo().rss * len(extension._get_main_process_ids()) |
| 82 | + assert extension._get_total_playwright_process_memory() == expected_size |
| 83 | + |
| 84 | + async def test_get_virtual_size_sum(self, _MailSender): |
| 85 | + crawler = MagicMock() |
| 86 | + extension = ScrapyPlaywrightMemoryUsageExtension(crawler) |
| 87 | + parent_cls_extension = MemoryUsage(crawler) |
| 88 | + extension._get_total_playwright_process_memory = MagicMock(return_value=123) |
| 89 | + assert extension.get_virtual_size() == parent_cls_extension.get_virtual_size() + 123 |
0 commit comments