Skip to content

Commit 9175fec

Browse files
committed
rebased to main
1 parent 706f8bf commit 9175fec

File tree

4 files changed

+20
-27
lines changed

4 files changed

+20
-27
lines changed

scrapyd_k8s/api.py

-4
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,6 @@ def api_schedule():
5656
_version = request.form.get('_version', 'latest') # TODO allow customizing latest tag
5757
# any other parameter is passed as spider argument
5858
args = { k: v for k, v in request.form.items() if k not in ('project', 'spider', 'setting', 'jobid', 'priority', '_version') }
59-
# running_jobs = launcher.get_running_jobs_count()
60-
# start_suspended = running_jobs >= k8s_scheduler.max_proc
61-
# logger.info(
62-
# f"Scheduling job {job_id} with start_suspended={start_suspended}. Running jobs: {running_jobs}, Max procs: {k8s_scheduler.max_proc}")
6359
env_config, env_secret = project.env_config(), project.env_secret()
6460
jobid = launcher.schedule(project, _version, spider, job_id, settings, args)
6561
return { 'status': 'ok', 'jobid': job_id }

scrapyd_k8s/joblogs/log_handler_k8s.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -266,15 +266,14 @@ def stream_logs(self, job_id, pod_name):
266266
self.concatenate_and_delete_files(log_file_path, temp_file_path)
267267
else:
268268
os.remove(temp_file_path)
269-
logger.info(f"Removed temporary file '{temp_file_path}' after streaming logs for job '{job_name}'.")
269+
logger.info(f"Removed temporary file '{temp_file_path}' after streaming logs for job '{job_id}'.")
270270
except (IOError, OSError) as e:
271-
logger.error(f"I/O error while streaming logs for job '{job_name}': {e}")
272-
raise PodStreamingError(f"I/O error while streaming logs for job '{job_name}': {e}") from e
271+
logger.error(f"I/O error while streaming logs for job '{job_id}': {e}")
272+
raise PodStreamingError(f"I/O error while streaming logs for job '{job_id}': {e}") from e
273273
except KubernetesJobLogHandlerError as e:
274-
logger.error(f"Error processing logs for job '{job_name}': {e}")
275-
raise PodStreamingError(f"Error processing logs for job '{job_name}': {e}") from e
276-
finally:
277-
w.stop()
274+
logger.error(f"Error processing logs for job '{job_id}': {e}")
275+
raise PodStreamingError(f"Error processing logs for job '{job_id}': {e}") from e
276+
278277
def handle_events(self, event):
279278
"""
280279
Watches Kubernetes pod events and handles actions such as starting log streaming or uploading logs.

scrapyd_k8s/k8s_scheduler/k8s_scheduler.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from kubernetes.client import ApiException
55

66
logger = logging.getLogger(__name__)
7+
logger.setLevel(logging.DEBUG)
78

89
class KubernetesScheduler:
910
"""
@@ -35,6 +36,7 @@ def __init__(self, config, launcher, max_proc):
3536
raise TypeError(f"max_proc must be an integer, got {type(max_proc).__name__}")
3637
self.max_proc = max_proc
3738
self.namespace = config.namespace()
39+
logger.info("Scheduler feature is initialized")
3840
except TypeError as e:
3941
logger.exception(f"TypeError during KubernetesScheduler initialization: {e}")
4042
raise
@@ -82,7 +84,7 @@ def handle_pod_event(self, event):
8284
return
8385

8486
# If a pod has terminated (Succeeded or Failed), we may have capacity to unsuspend jobs
85-
if pod_phase in ('Succeeded', 'Failed') and event_type in ('MODIFIED', 'DELETED'):
87+
if pod_phase in ('Succeeded', 'Failed'):
8688
logger.info(f"Pod {pod_name} has completed with phase {pod_phase}. Checking for suspended jobs.")
8789
self.check_and_unsuspend_jobs()
8890
else:

scrapyd_k8s/launcher/k8s.py

+11-15
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from scrapyd_k8s.joblogs import KubernetesJobLogHandler
1919

2020
logger = logging.getLogger(__name__)
21+
logger.setLevel(logging.DEBUG)
2122

2223
class K8s:
2324

@@ -27,6 +28,7 @@ class K8s:
2728

2829
def __init__(self, config):
2930
self._namespace = config.scrapyd().get('namespace', 'default')
31+
self.max_proc = config.scrapyd().get('max_proc')
3032
self._pull_secret = config.scrapyd().get('pull_secret')
3133
# TODO figure out where to put Kubernetes initialisation
3234
try:
@@ -37,9 +39,8 @@ def __init__(self, config):
3739
self._k8s = kubernetes.client.CoreV1Api()
3840
self._k8s_batch = kubernetes.client.BatchV1Api()
3941

40-
self.scheduler = None
42+
self.k8s_scheduler = None
4143
self._init_resource_watcher(config)
42-
self.max_proc = int(config.scrapyd().get('max_proc', 4))
4344

4445
def _init_resource_watcher(self, config):
4546
self.resource_watcher = ResourceWatcher(self._namespace, config)
@@ -50,12 +51,7 @@ def _init_resource_watcher(self, config):
5051
if self.max_proc is not None:
5152
self.enable_k8s_scheduler(config)
5253
else:
53-
logger.debug("k8s scheduler not enabled; 'max_proc' configuration is missing in the scrapyd section.")
54-
55-
# Initialize KubernetesScheduler
56-
self.max_proc = int(config.scrapyd().get('max_proc', 4))
57-
self.k8s_scheduler = KubernetesScheduler(config, self, self.resource_watcher, self.max_proc)
58-
logger.debug(f"KubernetesLauncher initialized with max_proc={self.max_proc}.")
54+
logger.debug("k8s scheduler not enabled; jobs run directly after scheduling.")
5955

6056
def get_node_name(self):
6157
deployment = os.getenv('MY_DEPLOYMENT_NAME', 'default')
@@ -71,11 +67,11 @@ def listjobs(self, project=None):
7167
)
7268

7369
def schedule(self, project, version, spider, job_id, settings, args):
74-
if self.scheduler:
70+
if self.k8s_scheduler:
7571
running_jobs = self.get_running_jobs_count()
76-
start_suspended = running_jobs >= self.scheduler.max_proc
72+
start_suspended = running_jobs >= self.k8s_scheduler.max_proc
7773
logger.debug(
78-
f"Scheduling job {job_id} with start_suspended={start_suspended}. Running jobs: {running_jobs}, Max procs: {self.scheduler.max_proc}")
74+
f"Scheduling job {job_id} with start_suspended={start_suspended}. Running jobs: {running_jobs}, Max procs: {self.k8s_scheduler.max_proc}")
7975
else:
8076
start_suspended = False
8177
logger.debug(f"Scheduling job {job_id} without suspension. Scheduler not enabled.")
@@ -175,16 +171,16 @@ def enable_joblogs(self, config):
175171
def enable_k8s_scheduler(self, config):
176172
try:
177173
max_proc = int(self.max_proc)
178-
self.scheduler = KubernetesScheduler(config, self, max_proc)
174+
self.k8s_scheduler = KubernetesScheduler(config, self, max_proc)
179175
logger.debug(f"KubernetesLauncher initialized with max_proc={max_proc}.")
180-
self.resource_watcher.subscribe(self.scheduler.handle_pod_event)
176+
self.resource_watcher.subscribe(self.k8s_scheduler.handle_pod_event)
181177
logger.info("K8s scheduler started.")
182178
except ValueError:
183179
logger.error(f"Invalid max_proc value: {self.max_proc}. Scheduler not enabled.")
184-
self.scheduler = None
180+
self.k8s_scheduler = None
185181

186182
def unsuspend_job(self, job_id: str):
187-
if not self.scheduler:
183+
if not self.k8s_scheduler:
188184
logger.error("Scheduler is not enabled. Cannot unsuspend jobs.")
189185
return False
190186
job_name = self._get_job_name(job_id)

0 commit comments

Comments
 (0)