Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add 3DGS dataset example for Wan #290

Merged
merged 1 commit into from
Mar 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 162 additions & 0 deletions examples/training/sft/wan/3dgs_dissolve/train.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
#!/bin/bash

set -e -x

# export TORCH_LOGS="+dynamo,recompiles,graph_breaks"
# export TORCHDYNAMO_VERBOSE=1
export WANDB_MODE="offline"
export NCCL_P2P_DISABLE=1
export TORCH_NCCL_ENABLE_MONITORING=0
export FINETRAINERS_LOG_LEVEL="DEBUG"

# Finetrainers supports multiple backends for distributed training. Select your favourite and benchmark the differences!
# BACKEND="accelerate"
BACKEND="ptd"

# In this setting, I'm using 2 GPUs on a 4-GPU node for training
NUM_GPUS=2
CUDA_VISIBLE_DEVICES="2,3"

# Check the JSON files for the expected JSON format
TRAINING_DATASET_CONFIG="examples/training/sft/wan/3dgs_dissolve/training.json"
VALIDATION_DATASET_FILE="examples/training/sft/wan/3dgs_dissolve/validation.json"

# Depending on how many GPUs you have available, choose your degree of parallelism and technique!
DDP_1="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 1 --dp_shards 1 --cp_degree 1 --tp_degree 1"
DDP_2="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 2 --dp_shards 1 --cp_degree 1 --tp_degree 1"
DDP_4="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 4 --dp_shards 1 --cp_degree 1 --tp_degree 1"
FSDP_2="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 1 --dp_shards 2 --cp_degree 1 --tp_degree 1"
FSDP_4="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 1 --dp_shards 4 --cp_degree 1 --tp_degree 1"
HSDP_2_2="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 2 --dp_shards 2 --cp_degree 1 --tp_degree 1"

# Parallel arguments
parallel_cmd=(
$DDP_2
)

# Model arguments
model_cmd=(
--model_name "wan"
--pretrained_model_name_or_path "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
)

# Dataset arguments
# Here, we know that the dataset size if about ~100 videos. Since we're using 2 GPUs, we precompute
# embeddings of 50 dataset items per GPU. Also, we're using a very small dataset for finetuning, so
# we are okay with precomputing embeddings once and re-using them without having to worry about disk
# space. Currently, however, every new training run performs precomputation even if it's not required
# (which is something we've to improve [TODO(aryan)])
dataset_cmd=(
--dataset_config $TRAINING_DATASET_CONFIG
--dataset_shuffle_buffer_size 10
--precomputation_items 100
--precomputation_once
)

# Dataloader arguments
dataloader_cmd=(
--dataloader_num_workers 0
)

# Diffusion arguments
diffusion_cmd=(
--flow_weighting_scheme "logit_normal"
)

# Training arguments
# We target just the attention projections layers for LoRA training here.
# You can modify as you please and target any layer (regex is supported)
training_cmd=(
--training_type "lora"
--seed 42
--batch_size 1
--train_steps 5000
--rank 32
--lora_alpha 32
--target_modules "blocks.*(to_q|to_k|to_v|to_out.0)"
--gradient_accumulation_steps 1
--gradient_checkpointing
--checkpointing_steps 500
--checkpointing_limit 2
# --resume_from_checkpoint 3000
--enable_slicing
--enable_tiling
)

# Optimizer arguments
optimizer_cmd=(
--optimizer "adamw"
--lr 5e-5
--lr_scheduler "constant_with_warmup"
--lr_warmup_steps 1000
--lr_num_cycles 1
--beta1 0.9
--beta2 0.99
--weight_decay 1e-4
--epsilon 1e-8
--max_grad_norm 1.0
)

# Validation arguments
validation_cmd=(
--validation_dataset_file "$VALIDATION_DATASET_FILE"
--validation_steps 500
)

# Miscellaneous arguments
miscellaneous_cmd=(
--tracker_name "finetrainers-wan"
--output_dir "/raid/aryan/wan"
--init_timeout 600
--nccl_timeout 600
--report_to "wandb"
)

# Execute the training script
if [ "$BACKEND" == "accelerate" ]; then

ACCELERATE_CONFIG_FILE=""
if [ "$NUM_GPUS" == 1 ]; then
ACCELERATE_CONFIG_FILE="accelerate_configs/uncompiled_1.yaml"
elif [ "$NUM_GPUS" == 2 ]; then
ACCELERATE_CONFIG_FILE="accelerate_configs/uncompiled_2.yaml"
elif [ "$NUM_GPUS" == 4 ]; then
ACCELERATE_CONFIG_FILE="accelerate_configs/uncompiled_4.yaml"
elif [ "$NUM_GPUS" == 8 ]; then
ACCELERATE_CONFIG_FILE="accelerate_configs/uncompiled_8.yaml"
fi

accelerate launch --config_file "$ACCELERATE_CONFIG_FILE" --gpu_ids $CUDA_VISIBLE_DEVICES train.py \
"${parallel_cmd[@]}" \
"${model_cmd[@]}" \
"${dataset_cmd[@]}" \
"${dataloader_cmd[@]}" \
"${diffusion_cmd[@]}" \
"${training_cmd[@]}" \
"${optimizer_cmd[@]}" \
"${validation_cmd[@]}" \
"${miscellaneous_cmd[@]}"

elif [ "$BACKEND" == "ptd" ]; then

export CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES

torchrun \
--standalone \
--nnodes=1 \
--nproc_per_node=$NUM_GPUS \
--rdzv_backend c10d \
--rdzv_endpoint="localhost:0" \
train.py \
"${parallel_cmd[@]}" \
"${model_cmd[@]}" \
"${dataset_cmd[@]}" \
"${dataloader_cmd[@]}" \
"${diffusion_cmd[@]}" \
"${training_cmd[@]}" \
"${optimizer_cmd[@]}" \
"${validation_cmd[@]}" \
"${miscellaneous_cmd[@]}"
fi

echo -ne "-------------------- Finished executing script --------------------\n\n"
24 changes: 24 additions & 0 deletions examples/training/sft/wan/3dgs_dissolve/training.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"datasets": [
{
"data_root": "finetrainers/3dgs-dissolve",
"dataset_type": "video",
"id_token": "3DGS_DISSOLVE",
"video_resolution_buckets": [
[49, 480, 832]
],
"reshape_mode": "bicubic",
"remove_common_llm_caption_prefixes": true
},
{
"data_root": "finetrainers/3dgs-dissolve",
"dataset_type": "video",
"id_token": "3DGS_DISSOLVE",
"video_resolution_buckets": [
[81, 480, 832]
],
"reshape_mode": "bicubic",
"remove_common_llm_caption_prefixes": true
}
]
}
58 changes: 58 additions & 0 deletions examples/training/sft/wan/3dgs_dissolve/validation.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
{
"data": [
{
"caption": "A spacecraft, rendered in a 3D appearance, ascends into the night sky, leaving behind a trail of fiery exhaust. As it climbs higher, the exhaust gradually transforms into a burst of red sparks, creating a dramatic and dynamic visual effect against the dark backdrop.",
"image_path": null,
"video_path": null,
"num_inference_steps": 50,
"height": 480,
"width": 832,
"num_frames": 49
},
{
"caption": "3DGS_DISSOLVE A spacecraft, rendered in a 3D appearance, ascends into the night sky, leaving behind a trail of fiery exhaust. As it climbs higher, the exhaust gradually transforms into a burst of red sparks, creating a dramatic and dynamic visual effect against the dark backdrop.",
"image_path": null,
"video_path": null,
"num_inference_steps": 50,
"height": 480,
"width": 832,
"num_frames": 49
},
{
"caption": "3DGS_DISSOLVE A spacecraft, rendered in a 3D appearance, ascends into the night sky, leaving behind a trail of fiery exhaust. As it climbs higher, the exhaust gradually transforms into a burst of red sparks, creating a dramatic and dynamic visual effect against the dark backdrop.",
"image_path": null,
"video_path": null,
"num_inference_steps": 50,
"height": 480,
"width": 832,
"num_frames": 81
},
{
"caption": "3DGS_DISSOLVE A vintage-style treasure chest, rendered in a 3D appearance, stands prominently against a dark background. As the scene progresses, the chest begins to emit a glowing light, which intensifies until it evaporates into a burst of red sparks, creating a dramatic and mysterious atmosphere.",
"image_path": null,
"video_path": null,
"num_inference_steps": 50,
"height": 480,
"width": 832,
"num_frames": 49
},
{
"caption": "3DGS_DISSOLVE A glowing, fiery cube in a 3D appearance begins to spin and rotate, its edges shimmering with intense light. As it continues to spin, the cube gradually evaporates into a burst of red sparks that scatter across the screen, creating a dynamic and mesmerizing visual effect against the dark background.",
"image_path": null,
"video_path": null,
"num_inference_steps": 50,
"height": 480,
"width": 832,
"num_frames": 49
},
{
"caption": "3DGS_DISSOLVE A dynamic explosion unfolds in a 3D appearance, beginning as a concentrated burst of intense orange flames. As the fire intensifies, it rapidly expands outward, transitioning into a vibrant display of red sparks that scatter across the frame. The sparks continue to evolve, evaporating into a burst of red sparks against the dark backdrop, creating a mesmerizing visual spectacle.",
"image_path": null,
"video_path": null,
"num_inference_steps": 50,
"height": 480,
"width": 832,
"num_frames": 49
}
]
}