You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
test_smallfile_workload[4-5000-22-5-33-CephBlockPool] is failing with error smallfile-client-1-benchmark-bbfd9296-sg7bb Failed to run - (Failed)
#10798
Open
pintojoy opened this issue
Nov 4, 2024
· 0 comments
Args:
file_size (int) : the size of the file to be used
files (int) : number of files to use
threads (int) : number of threads to be use in the test
samples (int) : how meany samples to run for each test
interface (str) : the volume type (rbd / cephfs)
"""
if config.PERF.get("deploy_internal_es"):
self.es = ElasticSearch()
else:
if config.PERF.get("internal_es_server") == "":
self.es = None
return
else:
url = (
f"{config.PERF.get('internal_es_scheme')}://{config.PERF.get('internal_es_server')}"
f":{config.PERF.get('internal_es_port')}",
)
self.es = {
"server": config.PERF.get("internal_es_server"),
"port": config.PERF.get("internal_es_port"),
"scheme": config.PERF.get("internal_es_scheme"),
"url": url,
}
# verify that the connection to the elasticsearch server is OK
if not super(TestSmallFileWorkload, self).es_connect():
self.es = None
return
# deploy the benchmark-operator
self.deploy_benchmark_operator()
# verify that there is an elasticsearch server for the benchmark
if not self.es:
log.error("This test must have an Elasticsearch server")
return False
# Getting the full path for the test logs
self.full_log_path = get_full_test_logs_path(cname=self)
self.results_path = get_full_test_logs_path(cname=self)
self.full_log_path += (
f"-{file_size}-{files}-{threads}-{samples}-{clients}-{interface}"
)
log.info(f"Logs file path name is : {self.full_log_path}")
# Loading the main template yaml file for the benchmark
log.info("Create resource file for small_files workload")
self.crd_data = templating.load_yaml(constants.SMALLFILE_BENCHMARK_YAML)
# Saving the Original elastic-search IP and PORT - if defined in yaml
self.es_info_backup(self.es)
self.set_storageclass(interface=interface)
# Setting the data set to 40% of the total storage capacity
self.setting_storage_usage(file_size, files, threads, samples, clients)
self.get_env_info()
def wait_for_wl_to_finish(self, timeout=18000, sleep=300):
"""
Waiting until the workload is finished and get the test log
Args:
timeout (int): time in second to wait until the benchmark start
sleep (int): Sleep interval seconds
Raise:
exception for too much restarts of the test.
ResourceWrongStatusException : test Failed / Error
TimeoutExpiredError : test did not completed on time.
"""
log.info(f"Waiting for {self.client_pod_name} to complete")
Finished = 0
restarts = 0
total_time = timeout
while not Finished and total_time > 0:
results = run_oc_command(
"get pod --no-headers -o custom-columns=:metadata.name,:status.phase",
namespace=benchmark_operator.BMO_NAME,
)
(fname, status) = ["", ""]
for name in results:
# looking for the pod which run the benchmark (not the IO)
# this pod contain the `client` in his name, and there is only one
# pod like this, other pods have the `server` in the name.
(fname, status) = name.split()
if re.search("client", fname):
break
else:
(fname, status) = ["", ""]
if fname == "": # there is no `client` pod !
err_msg = f"{self.client_pod} Failed to run !!!"
log.error(err_msg)
raise Exception(err_msg)
if not fname == self.client_pod:
# The client pod name is different from previous check, it was restarted
log.info(
f"The pod {self.client_pod} was restart. the new client pod is {fname}"
)
self.client_pod = fname
restarts += 1
# in case of restarting the benchmark, reset the timeout as well
total_time = timeout
if restarts > 3: # we are tolerating only 3 restarts
err_msg = f"Too much restarts of the benchmark ({restarts})"
log.error(err_msg)
raise Exception(err_msg)
if status == "Succeeded":
# Getting the end time of the benchmark - for reporting.
self.end_time = self.get_time()
self.test_logs = self.pod_obj.exec_oc_cmd(
f"logs {self.client_pod}", out_yaml_format=False
)
log.info(f"{self.client_pod} completed successfully")
Finished = 1
elif (
status != constants.STATUS_RUNNING
and status != constants.STATUS_PENDING
):
# if the benchmark pod is not in Running state (and not Completed/Pending),
# no need to wait for timeout.
# Note: the pod can be in pending state in case of restart.
err_msg = f"{self.client_pod} Failed to run - ({status})"
log.error(err_msg)
raise exceptions.ResourceWrongStatusException(
self.client_pod,
describe_out=err_msg,
column="Status",
expected="Succeeded",
got=status,
)
E ocs_ci.ocs.exceptions.ResourceWrongStatusException: Resource smallfile-client-1-benchmark-bbfd9296-sg7bb in column Status was in state Failed but expected Succeeded describe output: smallfile-client-1-benchmark-bbfd9296-sg7bb Failed to run - (Failed)
The text was updated successfully, but these errors were encountered:
self = <test_small_file_workload.TestSmallFileWorkload object at 0x7f243c4f0910>
file_size = 4, files = 5000, threads = 22, samples = 5, clients = 33
interface = 'CephBlockPool'
@pytest.mark.parametrize(
argnames=["file_size", "files", "threads", "samples", "clients", "interface"],
argvalues=[
pytest.param([4, 5000, 22, 5, 33, constants.CEPHBLOCKPOOL]),
pytest.param([16, 5000, 8, 5, 21, constants.CEPHBLOCKPOOL]),
pytest.param([4, 2500, 4, 5, 9, constants.CEPHFILESYSTEM]),
pytest.param([16, 1500, 4, 5, 9, constants.CEPHFILESYSTEM]),
],
)
@pytest.mark.polarion_id("OCS-1295")
def test_smallfile_workload(
self, file_size, files, threads, samples, clients, interface
):
"""
Run SmallFile Workload
if not self.run():
tests/cross_functional/performance/io_workload/test_small_file_workload.py:625:
tests/cross_functional/performance/io_workload/test_small_file_workload.py:518: in run
self.wait_for_wl_to_finish(sleep=30)
self = <test_small_file_workload.TestSmallFileWorkload object at 0x7f243c4f0910>
timeout = 18000, sleep = 30
def wait_for_wl_to_finish(self, timeout=18000, sleep=300):
"""
Waiting until the workload is finished and get the test log
E ocs_ci.ocs.exceptions.ResourceWrongStatusException: Resource smallfile-client-1-benchmark-bbfd9296-sg7bb in column Status was in state Failed but expected Succeeded describe output: smallfile-client-1-benchmark-bbfd9296-sg7bb Failed to run - (Failed)
The text was updated successfully, but these errors were encountered: