Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix PaliGemmaVitEncoder output shape #2140

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions keras_hub/api/layers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@
from keras_hub.src.models.segformer.segformer_image_converter import (
SegFormerImageConverter,
)
from keras_hub.src.models.siglip.siglip_image_converter import (
SigLIPImageConverter,
)
from keras_hub.src.models.vgg.vgg_image_converter import VGGImageConverter
from keras_hub.src.models.vit.vit_image_converter import ViTImageConverter
from keras_hub.src.models.whisper.whisper_audio_converter import (
Expand Down
7 changes: 7 additions & 0 deletions keras_hub/api/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,13 @@
)
from keras_hub.src.models.seq_2_seq_lm import Seq2SeqLM
from keras_hub.src.models.seq_2_seq_lm_preprocessor import Seq2SeqLMPreprocessor
from keras_hub.src.models.siglip.siglip_backbone import SigLIPBackbone
from keras_hub.src.models.siglip.siglip_preprocessor import SigLIPPreprocessor
from keras_hub.src.models.siglip.siglip_text_encoder import SigLIPTextEncoder
from keras_hub.src.models.siglip.siglip_tokenizer import SigLIPTokenizer
from keras_hub.src.models.siglip.siglip_vision_encoder import (
SigLIPVisionEncoder,
)
from keras_hub.src.models.stable_diffusion_3.stable_diffusion_3_backbone import (
StableDiffusion3Backbone,
)
Expand Down
1 change: 1 addition & 0 deletions keras_hub/api/tokenizers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
)
from keras_hub.src.models.phi3.phi3_tokenizer import Phi3Tokenizer
from keras_hub.src.models.roberta.roberta_tokenizer import RobertaTokenizer
from keras_hub.src.models.siglip.siglip_tokenizer import SigLIPTokenizer
from keras_hub.src.models.t5.t5_tokenizer import T5Tokenizer
from keras_hub.src.models.whisper.whisper_tokenizer import WhisperTokenizer
from keras_hub.src.models.xlm_roberta.xlm_roberta_tokenizer import (
Expand Down
98 changes: 97 additions & 1 deletion keras_hub/src/layers/preprocessing/image_converter.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import math

import keras
import ml_dtypes
import numpy as np
from keras import ops

Expand All @@ -18,6 +19,95 @@
from keras_hub.src.utils.tensor_utils import preprocessing_function


# TODO: Use `keras.layers.Resizing` once `antialias` is configurable.
# https://github.com/keras-team/keras/pull/20972
def _saturate_cast(x, dtype, backend_module):
def get_dtype_min_max(dtype):
if "bool" == dtype:
dtype_min = 0
dtype_max = 1
elif "int" in dtype:
dtype_min = ml_dtypes.iinfo(dtype).min
dtype_max = ml_dtypes.iinfo(dtype).max
else:
dtype_min = ml_dtypes.finfo(dtype).min
dtype_max = ml_dtypes.finfo(dtype).max
return dtype_min, dtype_max

dtype = keras.backend.standardize_dtype(dtype)
in_dtype = keras.backend.standardize_dtype(x.dtype)
in_min, in_max = get_dtype_min_max(in_dtype)
out_min, out_max = get_dtype_min_max(dtype)

min_limit = np.maximum(in_min, out_min).astype(in_dtype)
if min_limit < out_min:
min_limit = np.nextafter(min_limit, 0, dtype=in_dtype)
max_limit = np.minimum(in_max, out_max).astype(in_dtype)
if max_limit > out_max:
max_limit = np.nextafter(max_limit, 0, dtype=in_dtype)

x = backend_module.numpy.clip(x, min_limit, max_limit)
return backend_module.cast(x, dtype)


class ResizingAntialiasConfigurable(keras.layers.Resizing):
"""A preprocessing layer which resizes images.

This class is the same as `keras.layers.Resizing` but exposes `antialias` as
a configurable parameter.
"""

def __init__(
self,
height,
width,
interpolation="bilinear",
antialias=False,
crop_to_aspect_ratio=False,
pad_to_aspect_ratio=False,
fill_mode="constant",
fill_value=0.0,
data_format=None,
**kwargs,
):
super().__init__(
height=height,
width=width,
interpolation=interpolation,
crop_to_aspect_ratio=crop_to_aspect_ratio,
pad_to_aspect_ratio=pad_to_aspect_ratio,
fill_mode=fill_mode,
fill_value=fill_value,
data_format=data_format,
**kwargs,
)
self.antialias = bool(antialias)

def transform_images(self, images, transformation=None, training=True):
size = (self.height, self.width)
resized = self.backend.image.resize(
images,
size=size,
interpolation=self.interpolation,
antialias=self.antialias, # Added.
data_format=self.data_format,
crop_to_aspect_ratio=self.crop_to_aspect_ratio,
pad_to_aspect_ratio=self.pad_to_aspect_ratio,
fill_mode=self.fill_mode,
fill_value=self.fill_value,
)
if resized.dtype == images.dtype:
return resized
if keras.backend.is_int_dtype(images.dtype):
resized = self.backend.numpy.round(resized)
return _saturate_cast(resized, images.dtype, self.backend)

def get_config(self):
config = super().get_config()
config.update({"antialias": self.antialias})
return config


@keras_hub_export("keras_hub.layers.ImageConverter")
class ImageConverter(PreprocessingLayer):
"""Preprocess raw images into model ready inputs.
Expand Down Expand Up @@ -65,6 +155,8 @@ class ImageConverter(PreprocessingLayer):
interpolation: String, the interpolation method.
Supports `"bilinear"`, `"nearest"`, `"bicubic"`,
`"lanczos3"`, `"lanczos5"`. Defaults to `"bilinear"`.
antialias: Whether to use an antialiasing filter when downsampling an
image. Defaults to `False`.
bounding_box_format: A string specifying the format of the bounding
boxes, one of `"xyxy"`, `"rel_xyxy"`, `"xywh"`, `"center_xywh"`,
`"yxyx"`, `"rel_yxyx"`. Specifies the format of the bounding boxes
Expand Down Expand Up @@ -107,6 +199,7 @@ def __init__(
crop_to_aspect_ratio=True,
pad_to_aspect_ratio=False,
interpolation="bilinear",
antialias=False,
bounding_box_format="yxyx",
data_format=None,
**kwargs,
Expand All @@ -132,12 +225,13 @@ def __init__(
resizing_kwargs = {}
if check_bounding_box_support():
resizing_kwargs["bounding_box_format"] = bounding_box_format
self.resizing = keras.layers.Resizing(
self.resizing = ResizingAntialiasConfigurable(
height=image_size[0] if image_size else None,
width=image_size[1] if image_size else None,
crop_to_aspect_ratio=crop_to_aspect_ratio,
pad_to_aspect_ratio=pad_to_aspect_ratio,
interpolation=interpolation,
antialias=antialias,
data_format=data_format,
dtype=self.dtype_policy,
name="resizing",
Expand All @@ -148,6 +242,7 @@ def __init__(
self.crop_to_aspect_ratio = crop_to_aspect_ratio
self.pad_to_aspect_ratio = pad_to_aspect_ratio
self.interpolation = interpolation
self.antialias = antialias
self.bounding_box_format = bounding_box_format
self.data_format = standardize_data_format(data_format)

Expand Down Expand Up @@ -211,6 +306,7 @@ def get_config(self):
"scale": self.scale,
"offset": self.offset,
"interpolation": self.interpolation,
"antialias": self.antialias,
"crop_to_aspect_ratio": self.crop_to_aspect_ratio,
"pad_to_aspect_ratio": self.pad_to_aspect_ratio,
"bounding_box_format": self.bounding_box_format,
Expand Down
6 changes: 5 additions & 1 deletion keras_hub/src/models/pali_gemma/pali_gemma_vit.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,11 @@ def compute_output_shape(self, inputs_shape):
# `compute_output_spec` fails to propagate `inputs_shape`
# correctly, causing it to be `None`.
inputs_shape = [None, None, None]
return [inputs_shape[0], inputs_shape[1], self.hidden_dim]
return [
inputs_shape[0],
(inputs_shape[1] // self.patch_size) ** 2,
self.hidden_dim,
]

def get_config(self):
config = super().get_config()
Expand Down
5 changes: 5 additions & 0 deletions keras_hub/src/models/siglip/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from keras_hub.src.models.siglip.siglip_backbone import SigLIPBackbone
from keras_hub.src.models.siglip.siglip_presets import backbone_presets
from keras_hub.src.utils.preset_utils import register_presets

register_presets(backbone_presets, SigLIPBackbone)
Loading
Loading