Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Florence2 workflows block #661

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 82 additions & 0 deletions docker/dockerfiles/Dockerfile.onnx.gpu.dev
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
FROM nvcr.io/nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 as base

WORKDIR /app

RUN rm -rf /var/lib/apt/lists/* && apt-get clean && apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y \
ffmpeg \
libxext6 \
libopencv-dev \
uvicorn \
python3-pip \
git \
libgdal-dev \
wget \
&& rm -rf /var/lib/apt/lists/*

COPY requirements/requirements.sam.txt \
requirements/requirements.clip.txt \
requirements/requirements.http.txt \
requirements/requirements.gpu.txt \
requirements/requirements.waf.txt \
requirements/requirements.gaze.txt \
requirements/requirements.doctr.txt \
requirements/requirements.groundingdino.txt \
requirements/requirements.cogvlm.txt \
requirements/requirements.yolo_world.txt \
requirements/_requirements.txt \
requirements/requirements.transformers.txt \
requirements/requirements.pali.flash_attn.txt \
requirements/requirements.sdk.http.txt \
requirements/requirements.cli.txt \
./

RUN python3 -m pip install -U pip
RUN python3 -m pip install --extra-index-url https://download.pytorch.org/whl/cu118 \
-r _requirements.txt \
-r requirements.sam.txt \
-r requirements.clip.txt \
-r requirements.http.txt \
-r requirements.gpu.txt \
-r requirements.waf.txt \
-r requirements.gaze.txt \
-r requirements.groundingdino.txt \
-r requirements.doctr.txt \
-r requirements.cogvlm.txt \
-r requirements.yolo_world.txt \
-r requirements.transformers.txt \
-r requirements.sdk.http.txt \
-r requirements.cli.txt \
jupyterlab \
--upgrade \
&& rm -rf ~/.cache/pip

# Install setup.py requirements for flash_attn
RUN python3 -m pip install packaging==24.1 && rm -rf ~/.cache/pip

# Install flash_attn required for Paligemma and Florence2
RUN python3 -m pip install -r requirements.pali.flash_attn.txt --no-build-isolation && rm -rf ~/.cache/pip

FROM scratch
COPY --from=base / /

WORKDIR /app/
COPY inference inference
COPY inference_sdk inference_sdk
COPY inference_cli inference_cli
ENV PYTHONPATH=/app/
COPY docker/config/gpu_http.py gpu_http.py

ENV PYTHONPATH=/app/
ENV VERSION_CHECK_MODE=continuous
ENV PROJECT=roboflow-platform
ENV NUM_WORKERS=1
ENV HOST=0.0.0.0
ENV PORT=9001
ENV WORKFLOWS_STEP_EXECUTION_MODE=local
ENV WORKFLOWS_MAX_CONCURRENT_STEPS=1
ENV API_LOGGING_ENABLED=True
ENV LMM_ENABLED=True
ENV CORE_MODEL_SAM2_ENABLED=True
ENV CORE_MODEL_OWLV2_ENABLED=True

ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
4 changes: 3 additions & 1 deletion inference/core/entities/responses/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,9 @@ class MultiLabelClassificationInferenceResponse(


class LMMInferenceResponse(CvInferenceResponse):
response: str = Field(description="Text generated by PaliGemma")
response: Union[str, dict] = Field(
description="Text/structured response generated by model"
)


class FaceDetectionPrediction(ObjectDetectionPrediction):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import hashlib
import json
import logging
import re
Expand Down Expand Up @@ -90,15 +91,16 @@ class BlockManifest(WorkflowBlockManifest):
classes: Union[
WorkflowParameterSelector(kind=[LIST_OF_VALUES_KIND]),
StepOutputSelector(kind=[LIST_OF_VALUES_KIND]),
List[str],
Optional[List[str]],
] = Field(
description="List of all classes used by the model, required to "
"generate mapping between class name and class id.",
examples=[["$steps.lmm.classes", "$inputs.classes", ["class_a", "class_b"]]],
default=None,
)
model_type: Literal["google-gemini", "anthropic-claude"] = Field(
model_type: Literal["google-gemini", "anthropic-claude", "florence-2"] = Field(
description="Type of the model that generated prediction",
examples=[["google-gemini", "anthropic-claude"]],
examples=[["google-gemini", "anthropic-claude", "florence-2"]],
)
task_type: Literal["object-detection"]

Expand All @@ -108,6 +110,11 @@ def validate(self) -> "BlockManifest":
raise ValueError(
f"Could not parse result of task {self.task_type} for model {self.model_type}"
)
if self.model_type != "florence-2" and self.classes is None:
raise ValueError(
"Must pass list of classes to this block when using gemini or claude"
)

return self

@classmethod
Expand Down Expand Up @@ -135,7 +142,7 @@ def run(
self,
image: WorkflowImageData,
vlm_output: str,
classes: List[str],
classes: Optional[List[str]],
model_type: str,
task_type: str,
) -> BlockResult:
Expand Down Expand Up @@ -255,7 +262,58 @@ def scale_confidence(value: float) -> float:
return min(max(float(value), 0.0), 1.0)


def parse_florence2_object_detection_response(
image: WorkflowImageData,
parsed_data: dict,
classes: Optional[List[str]],
inference_id: str,
):
image_height, image_width = image.numpy_image.shape[:2]
detections = sv.Detections.from_lmm(
"florence_2",
parsed_data,
resolution_wh=(image_width, image_height),
)
detection_ids = np.array([str(uuid4()) for _ in range(len(detections))])
inference_ids = np.array([inference_id] * len(detections))
prediction_type = np.array(["object-detection"] * len(detections))
detections.data.update(
{
INFERENCE_ID_KEY: inference_ids,
DETECTION_ID_KEY: detection_ids,
PREDICTION_TYPE_KEY: prediction_type,
}
)
detections.confidence = np.array([1.0 for _ in detections])
detected_class_names = detections.data[CLASS_NAME_DATA_FIELD]
if classes is not None:
bool_array = np.array([c in classes for c in detected_class_names])
filtered_detections = detections[bool_array]
filtered_classes = filtered_detections.data[CLASS_NAME_DATA_FIELD]
filtered_detections.class_id = np.array(
[classes.index(c) for c in filtered_classes]
)
return attach_parents_coordinates_to_sv_detections(
detections=filtered_detections,
image=image,
)
# classes is None
class_ids = [get_4digit_from_md5(c) for c in detected_class_names]
detections.class_id = np.array(class_ids)
return attach_parents_coordinates_to_sv_detections(
detections=detections, image=image
)


def get_4digit_from_md5(input_string):
md5_hash = hashlib.md5(input_string.encode("utf-8"))
hex_digest = md5_hash.hexdigest()
integer_value = int(hex_digest[:9], 16)
return integer_value % 10000


REGISTERED_PARSERS = {
("google-gemini", "object-detection"): parse_gemini_object_detection_response,
("anthropic-claude", "object-detection"): parse_gemini_object_detection_response,
("florence-2", "object-detection"): parse_florence2_object_detection_response,
}
4 changes: 4 additions & 0 deletions inference/core/workflows/core_steps/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@
from inference.core.workflows.core_steps.models.foundation.cog_vlm.v1 import (
CogVLMBlockV1,
)
from inference.core.workflows.core_steps.models.foundation.florence2.v1 import (
Florence2BlockV1,
)
from inference.core.workflows.core_steps.models.foundation.google_gemini.v1 import (
GoogleGeminiBlockV1,
)
Expand Down Expand Up @@ -339,6 +342,7 @@ def load_blocks() -> List[Type[WorkflowBlock]]:
AntropicClaudeBlockV1,
LineCounterBlockV1,
PolygonZoneVisualizationBlockV1,
Florence2BlockV1,
]


Expand Down
Loading
Loading