|
2 | 2 | import logging
|
3 | 3 | import platform
|
4 | 4 | import threading
|
5 |
| -from typing import Awaitable, Iterator, Optional, Tuple, Union |
| 5 | +from typing import Awaitable, Dict, Iterator, Optional, Tuple, Union |
6 | 6 |
|
7 | 7 | import scrapy
|
8 | 8 | from playwright.async_api import Error, Page, Request, Response
|
@@ -103,68 +103,58 @@ async def _get_header_value(
|
103 | 103 | return None
|
104 | 104 |
|
105 | 105 |
|
106 |
| -if platform.system() == "Windows": |
107 |
| - |
108 |
| - class _ThreadedLoopAdapter: |
109 |
| - """Utility class to start an asyncio event loop in a new thread and redirect coroutines. |
110 |
| - This allows to run Playwright in a different loop than the Scrapy crawler, allowing to |
111 |
| - use ProactorEventLoop which is supported by Playwright on Windows. |
112 |
| - """ |
113 |
| - |
114 |
| - _loop: asyncio.AbstractEventLoop |
115 |
| - _thread: threading.Thread |
116 |
| - _coro_queue: asyncio.Queue = asyncio.Queue() |
117 |
| - _stop_event: asyncio.Event = asyncio.Event() |
118 |
| - |
119 |
| - @classmethod |
120 |
| - async def _handle_coro(cls, coro, future) -> None: |
121 |
| - try: |
122 |
| - future.set_result(await coro) |
123 |
| - except Exception as exc: |
124 |
| - future.set_exception(exc) |
125 |
| - |
126 |
| - @classmethod |
127 |
| - async def _process_queue(cls) -> None: |
128 |
| - while not cls._stop_event.is_set(): |
129 |
| - coro, future = await cls._coro_queue.get() |
130 |
| - asyncio.create_task(cls._handle_coro(coro, future)) |
131 |
| - cls._coro_queue.task_done() |
132 |
| - |
133 |
| - @classmethod |
134 |
| - def _deferred_from_coro(cls, coro) -> Deferred: |
135 |
| - future: asyncio.Future = asyncio.Future() |
136 |
| - asyncio.run_coroutine_threadsafe(cls._coro_queue.put((coro, future)), cls._loop) |
137 |
| - return scrapy.utils.defer.deferred_from_coro(future) |
138 |
| - |
139 |
| - @classmethod |
140 |
| - def start(cls) -> None: |
141 |
| - policy = asyncio.WindowsProactorEventLoopPolicy() # type: ignore[attr-defined] |
| 106 | +class _ThreadedLoopAdapter: |
| 107 | + """Utility class to start an asyncio event loop in a new thread and redirect coroutines. |
| 108 | + This allows to run Playwright in a different loop than the Scrapy crawler, allowing to |
| 109 | + use ProactorEventLoop which is supported by Playwright on Windows. |
| 110 | + """ |
| 111 | + |
| 112 | + _loop: asyncio.AbstractEventLoop |
| 113 | + _thread: threading.Thread |
| 114 | + _coro_queue: asyncio.Queue = asyncio.Queue() |
| 115 | + _stop_events: Dict[int, asyncio.Event] = {} |
| 116 | + |
| 117 | + @classmethod |
| 118 | + async def _handle_coro(cls, coro, future) -> None: |
| 119 | + try: |
| 120 | + future.set_result(await coro) |
| 121 | + except Exception as exc: |
| 122 | + future.set_exception(exc) |
| 123 | + |
| 124 | + @classmethod |
| 125 | + async def _process_queue(cls) -> None: |
| 126 | + while any(not ev.is_set() for ev in cls._stop_events.values()): |
| 127 | + coro, future = await cls._coro_queue.get() |
| 128 | + asyncio.create_task(cls._handle_coro(coro, future)) |
| 129 | + cls._coro_queue.task_done() |
| 130 | + |
| 131 | + @classmethod |
| 132 | + def _deferred_from_coro(cls, coro) -> Deferred: |
| 133 | + future: asyncio.Future = asyncio.Future() |
| 134 | + asyncio.run_coroutine_threadsafe(cls._coro_queue.put((coro, future)), cls._loop) |
| 135 | + return scrapy.utils.defer.deferred_from_coro(future) |
| 136 | + |
| 137 | + @classmethod |
| 138 | + def start(cls, caller_id: int) -> None: |
| 139 | + cls._stop_events[caller_id] = asyncio.Event() |
| 140 | + if not getattr(cls, "_loop", None): |
| 141 | + policy = asyncio.DefaultEventLoopPolicy() |
| 142 | + if platform.system() == "Windows": |
| 143 | + policy = asyncio.WindowsProactorEventLoopPolicy() # type: ignore[attr-defined] |
142 | 144 | cls._loop = policy.new_event_loop()
|
143 | 145 | asyncio.set_event_loop(cls._loop)
|
144 | 146 |
|
| 147 | + if not getattr(cls, "_thread", None): |
145 | 148 | cls._thread = threading.Thread(target=cls._loop.run_forever, daemon=True)
|
146 | 149 | cls._thread.start()
|
147 | 150 | logger.info("Started loop on separate thread: %s", cls._loop)
|
148 |
| - |
149 | 151 | asyncio.run_coroutine_threadsafe(cls._process_queue(), cls._loop)
|
150 | 152 |
|
151 |
| - @classmethod |
152 |
| - def stop(cls) -> None: |
153 |
| - cls._stop_event.set() |
| 153 | + @classmethod |
| 154 | + def stop(cls, caller_id: int) -> None: |
| 155 | + """Wait until all handlers are closed to stop the event loop and join the thread.""" |
| 156 | + cls._stop_events[caller_id].set() |
| 157 | + if all(ev.is_set() for ev in cls._stop_events.values()): |
154 | 158 | asyncio.run_coroutine_threadsafe(cls._coro_queue.join(), cls._loop)
|
155 | 159 | cls._loop.call_soon_threadsafe(cls._loop.stop)
|
156 | 160 | cls._thread.join()
|
157 |
| - |
158 |
| - _deferred_from_coro = _ThreadedLoopAdapter._deferred_from_coro |
159 |
| -else: |
160 |
| - |
161 |
| - class _ThreadedLoopAdapter: # type: ignore[no-redef] |
162 |
| - @classmethod |
163 |
| - def start(cls) -> None: |
164 |
| - pass |
165 |
| - |
166 |
| - @classmethod |
167 |
| - def stop(cls) -> None: |
168 |
| - pass |
169 |
| - |
170 |
| - _deferred_from_coro = scrapy.utils.defer.deferred_from_coro |
0 commit comments