You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
root@autodl-container-85184596f7-f5dd3621:~/autodl-tmp/FastVideo# bash scripts/finetune/finetune_hunyuan_hf_lora.sh
--> loading model from data/hunyuan_diffusers
[rank0]: Traceback (most recent call last):
[rank0]: File "/root/miniconda3/lib/python3.12/site-packages/diffusers/configuration_utils.py", line 432, in load_config
[rank0]: config_dict = cls._dict_from_json_file(config_file)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/lib/python3.12/site-packages/diffusers/configuration_utils.py", line 558, in _dict_from_json_file
[rank0]: return json.loads(text)
[rank0]: ^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/lib/python3.12/json/init.py", line 346, in loads
[rank0]: return _default_decoder.decode(s)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/lib/python3.12/json/decoder.py", line 337, in decode
[rank0]: obj, end = self.raw_decode(s, idx=_w(s, 0).end())
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/lib/python3.12/json/decoder.py", line 353, in raw_decode
[rank0]: obj, end = self.scan_once(s, idx)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^
[rank0]: json.decoder.JSONDecodeError: Expecting property name enclosed in double quotes: line 5 column 1 (char 38)
[rank0]: During handling of the above exception, another exception occurred:
[rank0]: Traceback (most recent call last):
[rank0]: File "/root/autodl-tmp/FastVideo/fastvideo/train.py", line 759, in
[rank0]: main(args)
[rank0]: File "/root/autodl-tmp/FastVideo/fastvideo/train.py", line 214, in main
[rank0]: transformer = load_transformer(
[rank0]: ^^^^^^^^^^^^^^^^^
[rank0]: File "/root/autodl-tmp/FastVideo/fastvideo/utils/load.py", line 278, in load_transformer
[rank0]: transformer = HunyuanVideoTransformer3DModel.from_pretrained(
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/lib/python3.12/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
[rank0]: return fn(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/lib/python3.12/site-packages/diffusers/models/modeling_utils.py", line 687, in from_pretrained
[rank0]: config, unused_kwargs, commit_hash = cls.load_config(
[rank0]: ^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/lib/python3.12/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
[rank0]: return fn(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/lib/python3.12/site-packages/diffusers/configuration_utils.py", line 436, in load_config
[rank0]: raise EnvironmentError(f"It looks like the config file at '{config_file}' is not a valid JSON file.")
[rank0]: OSError: It looks like the config file at 'data/hunyuan_diffusers/config.json' is not a valid JSON file.
[rank0]:[W224 11:10:38.669618087 ProcessGroupNCCL.cpp:1250] Warning: WARNING: process group has NOT been destroyed before we destruct ProcessGroupNCCL. On normal program exit, the application should call destroy_process_group to ensure that any pending NCCL operations have finished in this process. In rare cases this process can exit before this point and block the progress of another member of the process group. This constraint has always been present, but this warning has only been added since PyTorch 2.4 (function operator())
[rank1]: Traceback (most recent call last):
[rank1]: File "/root/miniconda3/lib/python3.12/site-packages/diffusers/configuration_utils.py", line 432, in load_config
[rank1]: config_dict = cls._dict_from_json_file(config_file)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/root/miniconda3/lib/python3.12/site-packages/diffusers/configuration_utils.py", line 558, in _dict_from_json_file
[rank1]: return json.loads(text)
[rank1]: ^^^^^^^^^^^^^^^^
[rank1]: File "/root/miniconda3/lib/python3.12/json/init.py", line 346, in loads
[rank1]: return _default_decoder.decode(s)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/root/miniconda3/lib/python3.12/json/decoder.py", line 337, in decode
[rank1]: obj, end = self.raw_decode(s, idx=_w(s, 0).end())
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/root/miniconda3/lib/python3.12/json/decoder.py", line 353, in raw_decode
[rank1]: obj, end = self.scan_once(s, idx)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^
[rank1]: json.decoder.JSONDecodeError: Expecting property name enclosed in double quotes: line 5 column 1 (char 38)
[rank1]: During handling of the above exception, another exception occurred:
[rank1]: Traceback (most recent call last):
[rank1]: File "/root/autodl-tmp/FastVideo/fastvideo/train.py", line 759, in
[rank1]: main(args)
[rank1]: File "/root/autodl-tmp/FastVideo/fastvideo/train.py", line 214, in main
[rank1]: transformer = load_transformer(
[rank1]: ^^^^^^^^^^^^^^^^^
[rank1]: File "/root/autodl-tmp/FastVideo/fastvideo/utils/load.py", line 278, in load_transformer
[rank1]: transformer = HunyuanVideoTransformer3DModel.from_pretrained(
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/root/miniconda3/lib/python3.12/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
[rank1]: return fn(*args, **kwargs)
[rank1]: ^^^^^^^^^^^^^^^^^^^
[rank1]: File "/root/miniconda3/lib/python3.12/site-packages/diffusers/models/modeling_utils.py", line 687, in from_pretrained
[rank1]: config, unused_kwargs, commit_hash = cls.load_config(
[rank1]: ^^^^^^^^^^^^^^^^
[rank1]: File "/root/miniconda3/lib/python3.12/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
[rank1]: return fn(*args, **kwargs)
[rank1]: ^^^^^^^^^^^^^^^^^^^
[rank1]: File "/root/miniconda3/lib/python3.12/site-packages/diffusers/configuration_utils.py", line 436, in load_config
[rank1]: raise EnvironmentError(f"It looks like the config file at '{config_file}' is not a valid JSON file.")
[rank1]: OSError: It looks like the config file at 'data/hunyuan_diffusers/config.json' is not a valid JSON file.
W0224 11:10:39.088000 1529 site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 1582 closing signal SIGTERM
E0224 11:10:39.304000 1529 site-packages/torch/distributed/elastic/multiprocessing/api.py:869] failed (exitcode: 1) local_rank: 0 (pid: 1581) of binary: /root/miniconda3/bin/python
Traceback (most recent call last):
File "/root/miniconda3/bin/torchrun", line 8, in
sys.exit(main())
^^^^^^
File "/root/miniconda3/lib/python3.12/site-packages/torch/distributed/elastic/multiprocessing/errors/init.py", line 355, in wrapper
return f(*args, **kwargs)
^^^^^^^^^^^^^^^^^^
File "/root/miniconda3/lib/python3.12/site-packages/torch/distributed/run.py", line 919, in main
run(args)
File "/root/miniconda3/lib/python3.12/site-packages/torch/distributed/run.py", line 910, in run
elastic_launch(
File "/root/miniconda3/lib/python3.12/site-packages/torch/distributed/launcher/api.py", line 138, in call
return launch_agent(self._config, self._entrypoint, list(args))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/miniconda3/lib/python3.12/site-packages/torch/distributed/launcher/api.py", line 269, in launch_agent
raise ChildFailedError(
torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
When i finetune hunyuan model, it keeps saying missing data/hunyuan_diffusers/config.json. And when i add a data/hunyuan_diffusers/config.json , it says "It looks like the config file at 'data/hunyuan_diffusers/config.json' is not a valid JSON file."
The text was updated successfully, but these errors were encountered:
Could you provide the script? I tried the latest version of the code of finetuning and it works. Also, could you check whether it has the correct path for hunyuan_diffuser? From my understanding, there is no config.json file in hunyuan_diffuser, and the error occurs because it didn't find the data/hunyuan_diffuser and tried to download from hugging face
Environment
GPU :NVIDIA vGPU-32GB, 3
Describe the bug
root@autodl-container-85184596f7-f5dd3621:~/autodl-tmp/FastVideo# bash scripts/finetune/finetune_hunyuan_hf_lora.sh
--> loading model from data/hunyuan_diffusers
[rank0]: Traceback (most recent call last):
[rank0]: File "/root/miniconda3/lib/python3.12/site-packages/diffusers/configuration_utils.py", line 432, in load_config
[rank0]: config_dict = cls._dict_from_json_file(config_file)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/lib/python3.12/site-packages/diffusers/configuration_utils.py", line 558, in _dict_from_json_file
[rank0]: return json.loads(text)
[rank0]: ^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/lib/python3.12/json/init.py", line 346, in loads
[rank0]: return _default_decoder.decode(s)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/lib/python3.12/json/decoder.py", line 337, in decode
[rank0]: obj, end = self.raw_decode(s, idx=_w(s, 0).end())
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/lib/python3.12/json/decoder.py", line 353, in raw_decode
[rank0]: obj, end = self.scan_once(s, idx)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^
[rank0]: json.decoder.JSONDecodeError: Expecting property name enclosed in double quotes: line 5 column 1 (char 38)
[rank0]: During handling of the above exception, another exception occurred:
[rank0]: Traceback (most recent call last):
[rank0]: File "/root/autodl-tmp/FastVideo/fastvideo/train.py", line 759, in
[rank0]: main(args)
[rank0]: File "/root/autodl-tmp/FastVideo/fastvideo/train.py", line 214, in main
[rank0]: transformer = load_transformer(
[rank0]: ^^^^^^^^^^^^^^^^^
[rank0]: File "/root/autodl-tmp/FastVideo/fastvideo/utils/load.py", line 278, in load_transformer
[rank0]: transformer = HunyuanVideoTransformer3DModel.from_pretrained(
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/lib/python3.12/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
[rank0]: return fn(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/lib/python3.12/site-packages/diffusers/models/modeling_utils.py", line 687, in from_pretrained
[rank0]: config, unused_kwargs, commit_hash = cls.load_config(
[rank0]: ^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/lib/python3.12/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
[rank0]: return fn(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/miniconda3/lib/python3.12/site-packages/diffusers/configuration_utils.py", line 436, in load_config
[rank0]: raise EnvironmentError(f"It looks like the config file at '{config_file}' is not a valid JSON file.")
[rank0]: OSError: It looks like the config file at 'data/hunyuan_diffusers/config.json' is not a valid JSON file.
[rank0]:[W224 11:10:38.669618087 ProcessGroupNCCL.cpp:1250] Warning: WARNING: process group has NOT been destroyed before we destruct ProcessGroupNCCL. On normal program exit, the application should call destroy_process_group to ensure that any pending NCCL operations have finished in this process. In rare cases this process can exit before this point and block the progress of another member of the process group. This constraint has always been present, but this warning has only been added since PyTorch 2.4 (function operator())
[rank1]: Traceback (most recent call last):
[rank1]: File "/root/miniconda3/lib/python3.12/site-packages/diffusers/configuration_utils.py", line 432, in load_config
[rank1]: config_dict = cls._dict_from_json_file(config_file)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/root/miniconda3/lib/python3.12/site-packages/diffusers/configuration_utils.py", line 558, in _dict_from_json_file
[rank1]: return json.loads(text)
[rank1]: ^^^^^^^^^^^^^^^^
[rank1]: File "/root/miniconda3/lib/python3.12/json/init.py", line 346, in loads
[rank1]: return _default_decoder.decode(s)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/root/miniconda3/lib/python3.12/json/decoder.py", line 337, in decode
[rank1]: obj, end = self.raw_decode(s, idx=_w(s, 0).end())
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/root/miniconda3/lib/python3.12/json/decoder.py", line 353, in raw_decode
[rank1]: obj, end = self.scan_once(s, idx)
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^
[rank1]: json.decoder.JSONDecodeError: Expecting property name enclosed in double quotes: line 5 column 1 (char 38)
[rank1]: During handling of the above exception, another exception occurred:
[rank1]: Traceback (most recent call last):
[rank1]: File "/root/autodl-tmp/FastVideo/fastvideo/train.py", line 759, in
[rank1]: main(args)
[rank1]: File "/root/autodl-tmp/FastVideo/fastvideo/train.py", line 214, in main
[rank1]: transformer = load_transformer(
[rank1]: ^^^^^^^^^^^^^^^^^
[rank1]: File "/root/autodl-tmp/FastVideo/fastvideo/utils/load.py", line 278, in load_transformer
[rank1]: transformer = HunyuanVideoTransformer3DModel.from_pretrained(
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank1]: File "/root/miniconda3/lib/python3.12/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
[rank1]: return fn(*args, **kwargs)
[rank1]: ^^^^^^^^^^^^^^^^^^^
[rank1]: File "/root/miniconda3/lib/python3.12/site-packages/diffusers/models/modeling_utils.py", line 687, in from_pretrained
[rank1]: config, unused_kwargs, commit_hash = cls.load_config(
[rank1]: ^^^^^^^^^^^^^^^^
[rank1]: File "/root/miniconda3/lib/python3.12/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
[rank1]: return fn(*args, **kwargs)
[rank1]: ^^^^^^^^^^^^^^^^^^^
[rank1]: File "/root/miniconda3/lib/python3.12/site-packages/diffusers/configuration_utils.py", line 436, in load_config
[rank1]: raise EnvironmentError(f"It looks like the config file at '{config_file}' is not a valid JSON file.")
[rank1]: OSError: It looks like the config file at 'data/hunyuan_diffusers/config.json' is not a valid JSON file.
W0224 11:10:39.088000 1529 site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 1582 closing signal SIGTERM
E0224 11:10:39.304000 1529 site-packages/torch/distributed/elastic/multiprocessing/api.py:869] failed (exitcode: 1) local_rank: 0 (pid: 1581) of binary: /root/miniconda3/bin/python
Traceback (most recent call last):
File "/root/miniconda3/bin/torchrun", line 8, in
sys.exit(main())
^^^^^^
File "/root/miniconda3/lib/python3.12/site-packages/torch/distributed/elastic/multiprocessing/errors/init.py", line 355, in wrapper
return f(*args, **kwargs)
^^^^^^^^^^^^^^^^^^
File "/root/miniconda3/lib/python3.12/site-packages/torch/distributed/run.py", line 919, in main
run(args)
File "/root/miniconda3/lib/python3.12/site-packages/torch/distributed/run.py", line 910, in run
elastic_launch(
File "/root/miniconda3/lib/python3.12/site-packages/torch/distributed/launcher/api.py", line 138, in call
return launch_agent(self._config, self._entrypoint, list(args))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/miniconda3/lib/python3.12/site-packages/torch/distributed/launcher/api.py", line 269, in launch_agent
raise ChildFailedError(
torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
fastvideo/train.py FAILED
Failures:
<NO_OTHER_FAILURES>
Root Cause (first observed failure):
[0]:
time : 2025-02-24_11:10:39
host : autodl-container-85184596f7-f5dd3621
rank : 0 (local_rank: 0)
exitcode : 1 (pid: 1581)
error_file: <N/A>
traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html
root@autodl-container-85184596f7-f5dd3621:~/autodl-tmp/FastVideo#
Reproduction
When i finetune hunyuan model, it keeps saying missing data/hunyuan_diffusers/config.json. And when i add a data/hunyuan_diffusers/config.json , it says "It looks like the config file at 'data/hunyuan_diffusers/config.json' is not a valid JSON file."
The text was updated successfully, but these errors were encountered: