Skip to content

Commit 0d0ff63

Browse files
authored
Merge pull request #4522 from pdhiran/3az-update
Addition of site, host & OSD down scenarios for 3AZ clusters
2 parents 96b0461 + 9808bdf commit 0d0ff63

5 files changed

+943
-11
lines changed

ceph/rados/core_workflows.py

+16-9
Original file line numberDiff line numberDiff line change
@@ -602,7 +602,10 @@ def check_health_warning(self, warning: str):
602602
status_report = self.run_ceph_command(cmd="ceph report", client_exec=True)
603603
ceph_health_status = list(status_report["health"]["checks"].keys())
604604
if warning in ceph_health_status:
605-
log.info(f"warning: {warning} present on the cluster")
605+
log.info(
606+
f"warning: {warning} present on the cluster"
607+
f"all Generated warnings : {ceph_health_status}"
608+
)
606609
log.info(
607610
f"Warning: {warning} generated on the cluster : {ceph_health_status}"
608611
)
@@ -725,9 +728,13 @@ def collect_osd_daemon_ids(self, osd_node) -> list:
725728
"""
726729
The method is used to collect the various OSD daemons present on a particular node
727730
:param osd_node: name of the OSD node on which osd daemon details are collected (ceph.ceph.CephNode): ceph node
731+
or host-name in string can also be sent
728732
:return: list of OSD ID's
729733
"""
730-
cmd = f"sudo ceph osd ls-tree {osd_node.hostname}"
734+
if isinstance(osd_node, str):
735+
cmd = f"sudo ceph osd ls-tree {osd_node}"
736+
else:
737+
cmd = f"sudo ceph osd ls-tree {osd_node.hostname}"
731738
return self.run_ceph_command(cmd=cmd)
732739

733740
def enable_balancer(self, **kwargs) -> bool:
@@ -2673,7 +2680,7 @@ def list_orch_services(self, service_type=None, export=None) -> list:
26732680
if orch_ls_op:
26742681
return [service["service_name"] for service in orch_ls_op]
26752682

2676-
def check_host_status(self, hostname, status: str = None) -> bool:
2683+
def check_host_status(self, hostname, status: str) -> bool:
26772684
"""
26782685
Checks the status of host(offline or online) using
26792686
ceph orch host ls and return boolean
@@ -2685,13 +2692,13 @@ def check_host_status(self, hostname, status: str = None) -> bool:
26852692
"""
26862693
host_cmd = f"ceph orch host ls --host_pattern {hostname}"
26872694
out = self.run_ceph_command(cmd=host_cmd, client_exec=True)
2688-
host_status = out[0]["status"].lower().strip()
2689-
log.info(f"Status of the host is {host_status}")
2690-
if status:
2695+
if out:
2696+
host_status = out[0]["status"].lower().strip()
2697+
log.info("Status of the host : %s is %s", hostname, host_status)
26912698
return True if status.lower() == host_status else False
2692-
elif "offline" in host_status:
2699+
else:
2700+
log.info("Host : %s is not part of the cluster", hostname)
26932701
return False
2694-
return True
26952702

26962703
def run_concurrent_io(self, pool_name: str, obj_name: str, obj_size: int):
26972704
"""
@@ -4524,7 +4531,7 @@ def get_osd_list(self, status: str) -> list:
45244531
osd_dict[key].append(entry["id"])
45254532
log.info(f"List of {key} OSDs: {osd_dict[key]}")
45264533

4527-
return osd_dict[status]
4534+
return osd_dict[status.lower()]
45284535

45294536
def get_osd_details(self, osd_id: int):
45304537
"""

ceph/rados/pool_workflows.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ def do_rados_put(
231231
put_cmd = (
232232
f"rados put -p {pool} obj{i} {infile}"
233233
if obj_name is None
234-
else f"rados put -p {pool} {obj_name} {infile}"
234+
else f"rados put -p {pool} {obj_name}-{i} {infile}"
235235
)
236236
if offset:
237237
put_cmd = f"{put_cmd} --offset {offset}"

suites/squid/rados/tier-3_rados_test-3-AZ-Cluster.yaml

+24
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,30 @@ tests:
209209
- test-pool-12
210210
desc: Enable 3 AZ stretch mode on new pools of the cluster post upgrade
211211

212+
- test:
213+
name: Site-down Scenarios
214+
module: test_stretch_n_az_site_down_scenarios.py
215+
polarion-id: CEPH-83609869
216+
config:
217+
pool_name: test_stretch_pool6
218+
stretch_bucket: datacenter
219+
rule_name: 3az_rule
220+
desc: Test stretch site down scenarios in 3 AZ cluster
221+
222+
- test:
223+
name: Maintenance mode Scenarios
224+
module: test_stretch_n_az_site_down_scenarios.py
225+
polarion-id: CEPH-83609871
226+
config:
227+
pool_name: test_stretch_pool7
228+
stretch_bucket: datacenter
229+
rule_name: 3az_rule
230+
scenarios_to_run:
231+
- scenario-8
232+
- scenario-9
233+
- scenario-10
234+
desc: Test stretch site maintenance mode scenarios in 3 AZ cluster
235+
212236
- test:
213237
name: Netsplit Scenarios data-data sites
214238
module: test_stretch_n-az_netsplit_scenarios.py

tests/rados/test_pool_osd_recovery.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,9 @@ def run(ceph_cluster, **kw) -> int:
120120
while datetime.datetime.now() < timeout_time:
121121
try:
122122
for node in osd_nodes:
123-
assert rados_obj.check_host_status(hostname=node.hostname)
123+
assert rados_obj.check_host_status(
124+
hostname=node.hostname, status="offline"
125+
)
124126
log.info("Rebooted hosts are up")
125127
break
126128
except AssertionError:

0 commit comments

Comments
 (0)