-
Notifications
You must be signed in to change notification settings - Fork 102
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add 3DGS dataset example for Wan (#290)
- Loading branch information
Showing
3 changed files
with
244 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
#!/bin/bash | ||
|
||
set -e -x | ||
|
||
# export TORCH_LOGS="+dynamo,recompiles,graph_breaks" | ||
# export TORCHDYNAMO_VERBOSE=1 | ||
export WANDB_MODE="offline" | ||
export NCCL_P2P_DISABLE=1 | ||
export TORCH_NCCL_ENABLE_MONITORING=0 | ||
export FINETRAINERS_LOG_LEVEL="DEBUG" | ||
|
||
# Finetrainers supports multiple backends for distributed training. Select your favourite and benchmark the differences! | ||
# BACKEND="accelerate" | ||
BACKEND="ptd" | ||
|
||
# In this setting, I'm using 2 GPUs on a 4-GPU node for training | ||
NUM_GPUS=2 | ||
CUDA_VISIBLE_DEVICES="2,3" | ||
|
||
# Check the JSON files for the expected JSON format | ||
TRAINING_DATASET_CONFIG="examples/training/sft/wan/3dgs_dissolve/training.json" | ||
VALIDATION_DATASET_FILE="examples/training/sft/wan/3dgs_dissolve/validation.json" | ||
|
||
# Depending on how many GPUs you have available, choose your degree of parallelism and technique! | ||
DDP_1="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 1 --dp_shards 1 --cp_degree 1 --tp_degree 1" | ||
DDP_2="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 2 --dp_shards 1 --cp_degree 1 --tp_degree 1" | ||
DDP_4="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 4 --dp_shards 1 --cp_degree 1 --tp_degree 1" | ||
FSDP_2="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 1 --dp_shards 2 --cp_degree 1 --tp_degree 1" | ||
FSDP_4="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 1 --dp_shards 4 --cp_degree 1 --tp_degree 1" | ||
HSDP_2_2="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 2 --dp_shards 2 --cp_degree 1 --tp_degree 1" | ||
|
||
# Parallel arguments | ||
parallel_cmd=( | ||
$DDP_2 | ||
) | ||
|
||
# Model arguments | ||
model_cmd=( | ||
--model_name "wan" | ||
--pretrained_model_name_or_path "Wan-AI/Wan2.1-T2V-1.3B-Diffusers" | ||
) | ||
|
||
# Dataset arguments | ||
# Here, we know that the dataset size if about ~100 videos. Since we're using 2 GPUs, we precompute | ||
# embeddings of 50 dataset items per GPU. Also, we're using a very small dataset for finetuning, so | ||
# we are okay with precomputing embeddings once and re-using them without having to worry about disk | ||
# space. Currently, however, every new training run performs precomputation even if it's not required | ||
# (which is something we've to improve [TODO(aryan)]) | ||
dataset_cmd=( | ||
--dataset_config $TRAINING_DATASET_CONFIG | ||
--dataset_shuffle_buffer_size 10 | ||
--precomputation_items 100 | ||
--precomputation_once | ||
) | ||
|
||
# Dataloader arguments | ||
dataloader_cmd=( | ||
--dataloader_num_workers 0 | ||
) | ||
|
||
# Diffusion arguments | ||
diffusion_cmd=( | ||
--flow_weighting_scheme "logit_normal" | ||
) | ||
|
||
# Training arguments | ||
# We target just the attention projections layers for LoRA training here. | ||
# You can modify as you please and target any layer (regex is supported) | ||
training_cmd=( | ||
--training_type "lora" | ||
--seed 42 | ||
--batch_size 1 | ||
--train_steps 5000 | ||
--rank 32 | ||
--lora_alpha 32 | ||
--target_modules "blocks.*(to_q|to_k|to_v|to_out.0)" | ||
--gradient_accumulation_steps 1 | ||
--gradient_checkpointing | ||
--checkpointing_steps 500 | ||
--checkpointing_limit 2 | ||
# --resume_from_checkpoint 3000 | ||
--enable_slicing | ||
--enable_tiling | ||
) | ||
|
||
# Optimizer arguments | ||
optimizer_cmd=( | ||
--optimizer "adamw" | ||
--lr 5e-5 | ||
--lr_scheduler "constant_with_warmup" | ||
--lr_warmup_steps 1000 | ||
--lr_num_cycles 1 | ||
--beta1 0.9 | ||
--beta2 0.99 | ||
--weight_decay 1e-4 | ||
--epsilon 1e-8 | ||
--max_grad_norm 1.0 | ||
) | ||
|
||
# Validation arguments | ||
validation_cmd=( | ||
--validation_dataset_file "$VALIDATION_DATASET_FILE" | ||
--validation_steps 500 | ||
) | ||
|
||
# Miscellaneous arguments | ||
miscellaneous_cmd=( | ||
--tracker_name "finetrainers-wan" | ||
--output_dir "/raid/aryan/wan" | ||
--init_timeout 600 | ||
--nccl_timeout 600 | ||
--report_to "wandb" | ||
) | ||
|
||
# Execute the training script | ||
if [ "$BACKEND" == "accelerate" ]; then | ||
|
||
ACCELERATE_CONFIG_FILE="" | ||
if [ "$NUM_GPUS" == 1 ]; then | ||
ACCELERATE_CONFIG_FILE="accelerate_configs/uncompiled_1.yaml" | ||
elif [ "$NUM_GPUS" == 2 ]; then | ||
ACCELERATE_CONFIG_FILE="accelerate_configs/uncompiled_2.yaml" | ||
elif [ "$NUM_GPUS" == 4 ]; then | ||
ACCELERATE_CONFIG_FILE="accelerate_configs/uncompiled_4.yaml" | ||
elif [ "$NUM_GPUS" == 8 ]; then | ||
ACCELERATE_CONFIG_FILE="accelerate_configs/uncompiled_8.yaml" | ||
fi | ||
|
||
accelerate launch --config_file "$ACCELERATE_CONFIG_FILE" --gpu_ids $CUDA_VISIBLE_DEVICES train.py \ | ||
"${parallel_cmd[@]}" \ | ||
"${model_cmd[@]}" \ | ||
"${dataset_cmd[@]}" \ | ||
"${dataloader_cmd[@]}" \ | ||
"${diffusion_cmd[@]}" \ | ||
"${training_cmd[@]}" \ | ||
"${optimizer_cmd[@]}" \ | ||
"${validation_cmd[@]}" \ | ||
"${miscellaneous_cmd[@]}" | ||
|
||
elif [ "$BACKEND" == "ptd" ]; then | ||
|
||
export CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES | ||
|
||
torchrun \ | ||
--standalone \ | ||
--nnodes=1 \ | ||
--nproc_per_node=$NUM_GPUS \ | ||
--rdzv_backend c10d \ | ||
--rdzv_endpoint="localhost:0" \ | ||
train.py \ | ||
"${parallel_cmd[@]}" \ | ||
"${model_cmd[@]}" \ | ||
"${dataset_cmd[@]}" \ | ||
"${dataloader_cmd[@]}" \ | ||
"${diffusion_cmd[@]}" \ | ||
"${training_cmd[@]}" \ | ||
"${optimizer_cmd[@]}" \ | ||
"${validation_cmd[@]}" \ | ||
"${miscellaneous_cmd[@]}" | ||
fi | ||
|
||
echo -ne "-------------------- Finished executing script --------------------\n\n" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
{ | ||
"datasets": [ | ||
{ | ||
"data_root": "finetrainers/3dgs-dissolve", | ||
"dataset_type": "video", | ||
"id_token": "3DGS_DISSOLVE", | ||
"video_resolution_buckets": [ | ||
[49, 480, 832] | ||
], | ||
"reshape_mode": "bicubic", | ||
"remove_common_llm_caption_prefixes": true | ||
}, | ||
{ | ||
"data_root": "finetrainers/3dgs-dissolve", | ||
"dataset_type": "video", | ||
"id_token": "3DGS_DISSOLVE", | ||
"video_resolution_buckets": [ | ||
[81, 480, 832] | ||
], | ||
"reshape_mode": "bicubic", | ||
"remove_common_llm_caption_prefixes": true | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
{ | ||
"data": [ | ||
{ | ||
"caption": "A spacecraft, rendered in a 3D appearance, ascends into the night sky, leaving behind a trail of fiery exhaust. As it climbs higher, the exhaust gradually transforms into a burst of red sparks, creating a dramatic and dynamic visual effect against the dark backdrop.", | ||
"image_path": null, | ||
"video_path": null, | ||
"num_inference_steps": 50, | ||
"height": 480, | ||
"width": 832, | ||
"num_frames": 49 | ||
}, | ||
{ | ||
"caption": "3DGS_DISSOLVE A spacecraft, rendered in a 3D appearance, ascends into the night sky, leaving behind a trail of fiery exhaust. As it climbs higher, the exhaust gradually transforms into a burst of red sparks, creating a dramatic and dynamic visual effect against the dark backdrop.", | ||
"image_path": null, | ||
"video_path": null, | ||
"num_inference_steps": 50, | ||
"height": 480, | ||
"width": 832, | ||
"num_frames": 49 | ||
}, | ||
{ | ||
"caption": "3DGS_DISSOLVE A spacecraft, rendered in a 3D appearance, ascends into the night sky, leaving behind a trail of fiery exhaust. As it climbs higher, the exhaust gradually transforms into a burst of red sparks, creating a dramatic and dynamic visual effect against the dark backdrop.", | ||
"image_path": null, | ||
"video_path": null, | ||
"num_inference_steps": 50, | ||
"height": 480, | ||
"width": 832, | ||
"num_frames": 81 | ||
}, | ||
{ | ||
"caption": "3DGS_DISSOLVE A vintage-style treasure chest, rendered in a 3D appearance, stands prominently against a dark background. As the scene progresses, the chest begins to emit a glowing light, which intensifies until it evaporates into a burst of red sparks, creating a dramatic and mysterious atmosphere.", | ||
"image_path": null, | ||
"video_path": null, | ||
"num_inference_steps": 50, | ||
"height": 480, | ||
"width": 832, | ||
"num_frames": 49 | ||
}, | ||
{ | ||
"caption": "3DGS_DISSOLVE A glowing, fiery cube in a 3D appearance begins to spin and rotate, its edges shimmering with intense light. As it continues to spin, the cube gradually evaporates into a burst of red sparks that scatter across the screen, creating a dynamic and mesmerizing visual effect against the dark background.", | ||
"image_path": null, | ||
"video_path": null, | ||
"num_inference_steps": 50, | ||
"height": 480, | ||
"width": 832, | ||
"num_frames": 49 | ||
}, | ||
{ | ||
"caption": "3DGS_DISSOLVE A dynamic explosion unfolds in a 3D appearance, beginning as a concentrated burst of intense orange flames. As the fire intensifies, it rapidly expands outward, transitioning into a vibrant display of red sparks that scatter across the frame. The sparks continue to evolve, evaporating into a burst of red sparks against the dark backdrop, creating a mesmerizing visual spectacle.", | ||
"image_path": null, | ||
"video_path": null, | ||
"num_inference_steps": 50, | ||
"height": 480, | ||
"width": 832, | ||
"num_frames": 49 | ||
} | ||
] | ||
} |