14
14
from ceph .ceph import CommandFailed
15
15
from tests .cephfs .cephfs_utilsV1 import FsUtils as FsUtilsv1
16
16
from tests .cephfs .cephfs_volume_management import wait_for_process
17
+ from tests .cephfs .lib .cephfs_common_lib import CephFSCommonUtils
17
18
from tests .cephfs .snapshot_clone .cephfs_cg_io import CG_snap_IO
18
19
from tests .cephfs .snapshot_clone .cg_snap_utils import CG_Snap_Utils
19
20
from utility .log import Log
@@ -106,12 +107,14 @@ def run(ceph_cluster, **kw):
106
107
Run QS IO validation tool on selected quiesce set
107
108
1.Run multiple parallel quiesce calls to same set
108
109
2.Create snapshots when quiesced, wait for sometime and release quiesce
109
- Clean Up:
110
+
111
+ Clean Up: Umount mountpoints, destroy subvolumes and subvolumegroup
110
112
111
113
"""
112
114
try :
113
115
test_data = kw .get ("test_data" )
114
116
fs_util_v1 = FsUtilsv1 (ceph_cluster , test_data = test_data )
117
+ cephfs_common_utils = CephFSCommonUtils (ceph_cluster )
115
118
erasure = (
116
119
FsUtilsv1 .get_custom_config_value (test_data , "erasure" )
117
120
if test_data
@@ -237,22 +240,17 @@ def run(ceph_cluster, **kw):
237
240
crash_status_before = fs_util_v1 .get_crash_ls_new (client1 )
238
241
239
242
log .info (f"Crash status before Test: { crash_status_before } " )
240
- end_time = datetime .datetime .now () + datetime .timedelta (seconds = 300 )
241
- ceph_healthy = 0
242
- while (datetime .datetime .now () < end_time ) and (ceph_healthy == 0 ):
243
- try :
244
- fs_util_v1 .get_ceph_health_status (client1 )
245
- ceph_healthy = 1
246
- except Exception as ex :
247
- log .info (ex )
248
- time .sleep (5 )
249
- if ceph_healthy == 0 :
250
- assert False , "Ceph Cluster remains unhealthy even after 5mins"
243
+ wait_time_secs = 300
244
+ if cephfs_common_utils .wait_for_healthy_ceph (client1 , wait_time_secs ):
245
+ assert (
246
+ False
247
+ ), f"Cluster health is not OK even after waiting for { wait_time_secs } secs"
251
248
cg_test_params = {
252
249
"ceph_cluster" : ceph_cluster ,
253
250
"fs_name" : default_fs ,
254
251
"fs_util" : fs_util_v1 ,
255
252
"cg_snap_util" : cg_snap_util ,
253
+ "cephfs_common_utils" : cephfs_common_utils ,
256
254
"cg_snap_io" : cg_snap_io ,
257
255
"clients" : qs_clients ,
258
256
"mgr_node" : mgr_node ,
@@ -305,11 +303,7 @@ def run(ceph_cluster, **kw):
305
303
finally :
306
304
log .info ("Clean Up in progess" )
307
305
wait_time_secs = 300
308
- if wait_for_healthy_ceph (client1 , fs_util_v1 , wait_time_secs ) == 0 :
309
- client1 .exec_command (
310
- sudo = True ,
311
- cmd = "ceph fs status;ceph status -s;ceph health detail" ,
312
- )
306
+ if cephfs_common_utils .wait_for_healthy_ceph (client1 , wait_time_secs ):
313
307
assert (
314
308
False
315
309
), f"Cluster health is not OK even after waiting for { wait_time_secs } secs"
@@ -2143,6 +2137,7 @@ def cg_snap_interop_1(cg_test_params):
2143
2137
qs_clients = [client , client1 ]
2144
2138
qs_sets = cg_test_params ["qs_sets" ]
2145
2139
cg_snap_util = cg_test_params ["cg_snap_util" ]
2140
+ cephfs_common_utils = cg_test_params ["cephfs_common_utils" ]
2146
2141
cg_snap_io = cg_test_params ["cg_snap_io" ]
2147
2142
fs_util = cg_test_params ["fs_util" ]
2148
2143
@@ -2164,13 +2159,13 @@ def cg_snap_interop_1(cg_test_params):
2164
2159
cmd += '"'
2165
2160
log .info ("Adding 7 MDS to cluster" )
2166
2161
out , rc = client .exec_command (sudo = True , cmd = cmd )
2167
- if wait_for_healthy_ceph (client1 , fs_util , 300 ) == 0 :
2162
+ if cephfs_common_utils . wait_for_healthy_ceph (client , 300 ):
2168
2163
return 1
2169
2164
client .exec_command (
2170
2165
sudo = True ,
2171
2166
cmd = f"ceph fs set { fs_name } max_mds 4" ,
2172
2167
)
2173
- if wait_for_healthy_ceph (client1 , fs_util , 300 ) == 0 :
2168
+ if cephfs_common_utils . wait_for_healthy_ceph (client , 300 ):
2174
2169
return 1
2175
2170
test_fail = 0
2176
2171
qs_set = random .choice (qs_sets )
@@ -2256,7 +2251,7 @@ def cg_snap_interop_1(cg_test_params):
2256
2251
)
2257
2252
time .sleep (10 )
2258
2253
2259
- if wait_for_healthy_ceph (client1 , fs_util , 300 ) == 0 :
2254
+ if cephfs_common_utils . wait_for_healthy_ceph (client , 300 ):
2260
2255
log .error ("Ceph cluster is not healthy after MDS failover" )
2261
2256
return 1
2262
2257
log .info ("Verify quiesce lifecycle can suceed after mds failover" )
@@ -2302,7 +2297,7 @@ def cg_snap_interop_1(cg_test_params):
2302
2297
2303
2298
log .info ("MDS failover when quiesced: quiesce state is CANCELED" )
2304
2299
time .sleep (10 )
2305
- if wait_for_healthy_ceph (client1 , fs_util , 300 ) == 0 :
2300
+ if cephfs_common_utils . wait_for_healthy_ceph (client , 300 ):
2306
2301
log .error ("Ceph cluster is not healthy after MDS failover" )
2307
2302
return 1
2308
2303
log .info ("Verify quiesce lifecycle can suceed after mds failover" )
@@ -2345,7 +2340,7 @@ def cg_snap_interop_1(cg_test_params):
2345
2340
log .error (f"qs set { qs_id_val } not reached RELEASED state" )
2346
2341
2347
2342
time .sleep (10 )
2348
- if wait_for_healthy_ceph (client1 , fs_util , 300 ) == 0 :
2343
+ if cephfs_common_utils . wait_for_healthy_ceph (client , 300 ):
2349
2344
log .error ("Ceph cluster is not healthy after MDS failover" )
2350
2345
return 1
2351
2346
log .info ("Verify quiesce lifecycle can suceed after mds failover" )
@@ -2389,13 +2384,9 @@ def cg_snap_interop_1(cg_test_params):
2389
2384
sudo = True ,
2390
2385
cmd = f"ceph fs set { fs_name } max_mds 2" ,
2391
2386
)
2392
- if wait_for_healthy_ceph (client1 , fs_util , 300 ) == 0 :
2387
+ if cephfs_common_utils . wait_for_healthy_ceph (client1 , 300 ):
2393
2388
log .error ("Ceph cluster is not healthy after max_mds set to 2" )
2394
2389
test_fail += 1
2395
- client .exec_command (
2396
- sudo = True ,
2397
- cmd = f"ceph fs status { fs_name } ;ceph status -s;ceph health detail" ,
2398
- )
2399
2390
if cg_test_io_status .value == 1 :
2400
2391
log .error (
2401
2392
f"CG IO test exits with failure during quiesce test on qs_set-{ qs_id_val } "
@@ -3071,23 +3062,3 @@ def wait_for_two_active_mds(client1, fs_name, max_wait_time=180, retry_interval=
3071
3062
time .sleep (retry_interval ) # Retry after the specified interval
3072
3063
3073
3064
return False
3074
-
3075
-
3076
- def wait_for_healthy_ceph (client1 , fs_util , wait_time_secs ):
3077
- # Returns 1 if healthy, 0 if unhealthy
3078
- ceph_healthy = 0
3079
- end_time = datetime .datetime .now () + datetime .timedelta (seconds = wait_time_secs )
3080
- while ceph_healthy == 0 and (datetime .datetime .now () < end_time ):
3081
- try :
3082
- fs_util .get_ceph_health_status (client1 )
3083
- ceph_healthy = 1
3084
- except Exception as ex :
3085
- log .info (ex )
3086
- log .info (
3087
- f"Wait for sometime to check if Cluster health can be OK, current state : { ex } "
3088
- )
3089
- time .sleep (5 )
3090
-
3091
- if ceph_healthy == 0 :
3092
- return 0
3093
- return 1
0 commit comments