From e0ced4f5bf3a9d98f552fb26a09c5d7fe9a5df54 Mon Sep 17 00:00:00 2001 From: Aryan Date: Sat, 25 Jan 2025 04:20:49 +0530 Subject: [PATCH] Fix LTX frame rate for rope interpolation scale calculation (#244) --- finetrainers/models/ltx_video/lora.py | 5 ++++- finetrainers/trainer.py | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/finetrainers/models/ltx_video/lora.py b/finetrainers/models/ltx_video/lora.py index 024f1fbb..bdd6ffa3 100644 --- a/finetrainers/models/ltx_video/lora.py +++ b/finetrainers/models/ltx_video/lora.py @@ -205,7 +205,10 @@ def forward_pass( **kwargs, ) -> torch.Tensor: # TODO(aryan): make configurable - rope_interpolation_scale = [1 / 25, 32, 32] + frame_rate = 25 + latent_frame_rate = frame_rate / 8 + spatial_compression_ratio = 32 + rope_interpolation_scale = [1 / latent_frame_rate, spatial_compression_ratio, spatial_compression_ratio] denoised_latents = transformer( hidden_states=noisy_latents, diff --git a/finetrainers/trainer.py b/finetrainers/trainer.py index 0839db79..153e4069 100644 --- a/finetrainers/trainer.py +++ b/finetrainers/trainer.py @@ -549,10 +549,10 @@ def train(self) -> None: if self.vae_config is None: # If we've precomputed conditions and latents already, and are now re-using it, we will never load # the VAE so self.vae_config will not be set. So, we need to load it here. - vae_cls_name = resolve_vae_cls_from_ckpt_path( + vae_cls = resolve_vae_cls_from_ckpt_path( self.args.pretrained_model_name_or_path, revision=self.args.revision, cache_dir=self.args.cache_dir ) - vae_config = vae_cls_name.load_config( + vae_config = vae_cls.load_config( self.args.pretrained_model_name_or_path, subfolder="vae", revision=self.args.revision,