test_smallfile_workload[4-5000-22-5-33-CephBlockPool] is failing with error smallfile-client-1-benchmark-bbfd9296-sg7bb Failed to run - (Failed) #10798

pintojoy · 2024-11-04T11:28:42Z

self = <test_small_file_workload.TestSmallFileWorkload object at 0x7f243c4f0910>
file_size = 4, files = 5000, threads = 22, samples = 5, clients = 33
interface = 'CephBlockPool'

@pytest.mark.parametrize(
argnames=["file_size", "files", "threads", "samples", "clients", "interface"],
argvalues=[
pytest.param([4, 5000, 22, 5, 33, constants.CEPHBLOCKPOOL]),
pytest.param([16, 5000, 8, 5, 21, constants.CEPHBLOCKPOOL]),
pytest.param([4, 2500, 4, 5, 9, constants.CEPHFILESYSTEM]),
pytest.param([16, 1500, 4, 5, 9, constants.CEPHFILESYSTEM]),
],
)
@pytest.mark.polarion_id("OCS-1295")
def test_smallfile_workload(
self, file_size, files, threads, samples, clients, interface
):
"""
Run SmallFile Workload

Args:
    file_size (int) : the size of the file to be used
    files (int) : number of files to use
    threads (int) : number of threads to be use in the test
    samples (int) : how meany samples to run for each test
    interface (str) : the volume type (rbd / cephfs)

"""
if config.PERF.get("deploy_internal_es"):
    self.es = ElasticSearch()
else:
    if config.PERF.get("internal_es_server") == "":
        self.es = None
        return
    else:
        url = (
            f"{config.PERF.get('internal_es_scheme')}://{config.PERF.get('internal_es_server')}"
            f":{config.PERF.get('internal_es_port')}",
        )
        self.es = {
            "server": config.PERF.get("internal_es_server"),
            "port": config.PERF.get("internal_es_port"),
            "scheme": config.PERF.get("internal_es_scheme"),
            "url": url,
        }
        # verify that the connection to the elasticsearch server is OK
        if not super(TestSmallFileWorkload, self).es_connect():
            self.es = None
            return

# deploy the benchmark-operator
self.deploy_benchmark_operator()

# verify that there is an elasticsearch server for the benchmark
if not self.es:
    log.error("This test must have an Elasticsearch server")
    return False

# Getting the full path for the test logs
self.full_log_path = get_full_test_logs_path(cname=self)
self.results_path = get_full_test_logs_path(cname=self)
self.full_log_path += (
    f"-{file_size}-{files}-{threads}-{samples}-{clients}-{interface}"
)
log.info(f"Logs file path name is : {self.full_log_path}")

# Loading the main template yaml file for the benchmark
log.info("Create resource file for small_files workload")
self.crd_data = templating.load_yaml(constants.SMALLFILE_BENCHMARK_YAML)

# Saving the Original elastic-search IP and PORT - if defined in yaml
self.es_info_backup(self.es)

self.set_storageclass(interface=interface)

# Setting the data set to 40% of the total storage capacity
self.setting_storage_usage(file_size, files, threads, samples, clients)

self.get_env_info()

if not self.run():

tests/cross_functional/performance/io_workload/test_small_file_workload.py:625:

tests/cross_functional/performance/io_workload/test_small_file_workload.py:518: in run
self.wait_for_wl_to_finish(sleep=30)

self = <test_small_file_workload.TestSmallFileWorkload object at 0x7f243c4f0910>
timeout = 18000, sleep = 30

def wait_for_wl_to_finish(self, timeout=18000, sleep=300):
"""
Waiting until the workload is finished and get the test log

Args:
    timeout (int): time in second to wait until the benchmark start
    sleep (int): Sleep interval seconds

Raise:
    exception for too much restarts of the test.
    ResourceWrongStatusException : test Failed / Error
    TimeoutExpiredError : test did not completed on time.

"""
log.info(f"Waiting for {self.client_pod_name} to complete")

Finished = 0
restarts = 0
total_time = timeout
while not Finished and total_time > 0:
    results = run_oc_command(
        "get pod --no-headers -o custom-columns=:metadata.name,:status.phase",
        namespace=benchmark_operator.BMO_NAME,
    )
    (fname, status) = ["", ""]
    for name in results:
        # looking for the pod which run the benchmark (not the IO)
        # this pod contain the `client` in his name, and there is only one
        # pod like this, other pods have the `server` in the name.
        (fname, status) = name.split()
        if re.search("client", fname):
            break
        else:
            (fname, status) = ["", ""]

    if fname == "":  # there is no `client` pod !
        err_msg = f"{self.client_pod} Failed to run !!!"
        log.error(err_msg)
        raise Exception(err_msg)

    if not fname == self.client_pod:
        # The client pod name is different from previous check, it was restarted
        log.info(
            f"The pod {self.client_pod} was restart. the new client pod is {fname}"
        )
        self.client_pod = fname
        restarts += 1
        # in case of restarting the benchmark, reset the timeout as well
        total_time = timeout

    if restarts > 3:  # we are tolerating only 3 restarts
        err_msg = f"Too much restarts of the benchmark ({restarts})"
        log.error(err_msg)
        raise Exception(err_msg)

    if status == "Succeeded":
        # Getting the end time of the benchmark - for reporting.
        self.end_time = self.get_time()
        self.test_logs = self.pod_obj.exec_oc_cmd(
            f"logs {self.client_pod}", out_yaml_format=False
        )
        log.info(f"{self.client_pod} completed successfully")
        Finished = 1
    elif (
        status != constants.STATUS_RUNNING
        and status != constants.STATUS_PENDING
    ):
        # if the benchmark pod is not in Running state (and not Completed/Pending),
        # no need to wait for timeout.
        # Note: the pod can be in pending state in case of restart.
        err_msg = f"{self.client_pod} Failed to run - ({status})"
        log.error(err_msg)

      raise exceptions.ResourceWrongStatusException(
            self.client_pod,
            describe_out=err_msg,
            column="Status",
            expected="Succeeded",
            got=status,
        )

E ocs_ci.ocs.exceptions.ResourceWrongStatusException: Resource smallfile-client-1-benchmark-bbfd9296-sg7bb in column Status was in state Failed but expected Succeeded describe output: smallfile-client-1-benchmark-bbfd9296-sg7bb Failed to run - (Failed)

The text was updated successfully, but these errors were encountered:

pintojoy added the Squad/Gray label Nov 4, 2024

pintojoy self-assigned this Nov 4, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

test_smallfile_workload[4-5000-22-5-33-CephBlockPool] is failing with error smallfile-client-1-benchmark-bbfd9296-sg7bb Failed to run - (Failed) #10798

test_smallfile_workload[4-5000-22-5-33-CephBlockPool] is failing with error smallfile-client-1-benchmark-bbfd9296-sg7bb Failed to run - (Failed) #10798

pintojoy commented Nov 4, 2024

test_smallfile_workload[4-5000-22-5-33-CephBlockPool] is failing with error smallfile-client-1-benchmark-bbfd9296-sg7bb Failed to run - (Failed) #10798

test_smallfile_workload[4-5000-22-5-33-CephBlockPool] is failing with error smallfile-client-1-benchmark-bbfd9296-sg7bb Failed to run - (Failed) #10798

Comments

pintojoy commented Nov 4, 2024