Skip to content

Commit dc65d23

Browse files
Merge pull request #613 from roboflow/dataset_upload_resolution
Dataset Upload Block- Changed the default for upload max_image_size
2 parents 547384a + 2eae56c commit dc65d23

File tree

8 files changed

+895
-1
lines changed

8 files changed

+895
-1
lines changed

inference/core/version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "0.16.4"
1+
__version__ = "0.17.0"
22

33

44
if __name__ == "__main__":

inference/core/workflows/core_steps/loader.py

+4
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,9 @@
105105
from inference.core.workflows.core_steps.sinks.roboflow.dataset_upload.v1 import (
106106
RoboflowDatasetUploadBlockV1,
107107
)
108+
from inference.core.workflows.core_steps.sinks.roboflow.dataset_upload.v2 import (
109+
RoboflowDatasetUploadBlockV2,
110+
)
108111
from inference.core.workflows.core_steps.transformations.absolute_static_crop.v1 import (
109112
AbsoluteStaticCropBlockV1,
110113
)
@@ -291,6 +294,7 @@ def load_blocks() -> List[Type[WorkflowBlock]]:
291294
ImageContoursDetectionBlockV1,
292295
ClipComparisonBlockV2,
293296
CameraFocusBlockV1,
297+
RoboflowDatasetUploadBlockV2,
294298
]
295299

296300

inference/core/workflows/core_steps/sinks/roboflow/dataset_upload/v1.py

+17
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,20 @@
1+
"""
2+
*****************************************************************
3+
* WARNING! *
4+
*****************************************************************
5+
This module contains the utility functions used by
6+
RoboflowDatasetUploadBlockV2.
7+
8+
We do not recommend making multiple blocks dependent on the same code,
9+
but the change between v1 and v2 was basically the default value of
10+
some parameter - hence we decided not to replicate the code.
11+
12+
If you need to modify this module beware that you may introduce
13+
change to RoboflowDatasetUploadBlockV2! If that happens,
14+
probably that's the time to disentangle those blocks and copy the
15+
code.
16+
"""
17+
118
import hashlib
219
import json
320
import logging
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,322 @@
1+
import random
2+
from concurrent.futures import ThreadPoolExecutor
3+
from typing import List, Literal, Optional, Tuple, Type, Union
4+
5+
import supervision as sv
6+
from fastapi import BackgroundTasks
7+
from pydantic import ConfigDict, Field
8+
from typing_extensions import Annotated
9+
10+
from inference.core.cache.base import BaseCache
11+
from inference.core.workflows.core_steps.sinks.roboflow.dataset_upload.v1 import (
12+
register_datapoint_at_roboflow,
13+
)
14+
from inference.core.workflows.execution_engine.entities.base import (
15+
Batch,
16+
OutputDefinition,
17+
WorkflowImageData,
18+
)
19+
from inference.core.workflows.execution_engine.entities.types import (
20+
BATCH_OF_BOOLEAN_KIND,
21+
BATCH_OF_CLASSIFICATION_PREDICTION_KIND,
22+
BATCH_OF_INSTANCE_SEGMENTATION_PREDICTION_KIND,
23+
BATCH_OF_KEYPOINT_DETECTION_PREDICTION_KIND,
24+
BATCH_OF_OBJECT_DETECTION_PREDICTION_KIND,
25+
BATCH_OF_STRING_KIND,
26+
BOOLEAN_KIND,
27+
FLOAT_KIND,
28+
ROBOFLOW_PROJECT_KIND,
29+
STRING_KIND,
30+
ImageInputField,
31+
StepOutputImageSelector,
32+
StepOutputSelector,
33+
WorkflowImageSelector,
34+
WorkflowParameterSelector,
35+
)
36+
from inference.core.workflows.prototypes.block import (
37+
BlockResult,
38+
WorkflowBlock,
39+
WorkflowBlockManifest,
40+
)
41+
42+
FloatZeroToHundred = Annotated[float, Field(ge=0.0, le=100.0)]
43+
44+
SHORT_DESCRIPTION = "Save images and predictions in your Roboflow Dataset"
45+
46+
LONG_DESCRIPTION = """
47+
Block let users save their images and predictions into Roboflow Dataset. Persisting data from
48+
production environments helps iteratively building more robust models.
49+
50+
Block provides configuration options to decide how data should be stored and what are the limits
51+
to be applied. We advice using this block in combination with rate limiter blocks to effectively
52+
collect data that the model struggle with.
53+
"""
54+
55+
WORKSPACE_NAME_CACHE_EXPIRE = 900 # 15 min
56+
TIMESTAMP_FORMAT = "%Y_%m_%d"
57+
DUPLICATED_STATUS = "Duplicated image"
58+
BatchCreationFrequency = Literal["never", "daily", "weekly", "monthly"]
59+
60+
61+
class BlockManifest(WorkflowBlockManifest):
62+
model_config = ConfigDict(
63+
json_schema_extra={
64+
"name": "Roboflow Dataset Upload",
65+
"version": "v2",
66+
"short_description": SHORT_DESCRIPTION,
67+
"long_description": LONG_DESCRIPTION,
68+
"license": "Apache-2.0",
69+
"block_type": "sink",
70+
}
71+
)
72+
type: Literal["roboflow_core/roboflow_dataset_upload@v2"]
73+
images: Union[WorkflowImageSelector, StepOutputImageSelector] = ImageInputField
74+
predictions: Optional[
75+
StepOutputSelector(
76+
kind=[
77+
BATCH_OF_OBJECT_DETECTION_PREDICTION_KIND,
78+
BATCH_OF_INSTANCE_SEGMENTATION_PREDICTION_KIND,
79+
BATCH_OF_KEYPOINT_DETECTION_PREDICTION_KIND,
80+
BATCH_OF_CLASSIFICATION_PREDICTION_KIND,
81+
]
82+
)
83+
] = Field(
84+
default=None,
85+
description="Reference q detection-like predictions",
86+
examples=["$steps.object_detection_model.predictions"],
87+
)
88+
target_project: Union[
89+
WorkflowParameterSelector(kind=[ROBOFLOW_PROJECT_KIND]), str
90+
] = Field(
91+
description="name of Roboflow dataset / project to be used as target for collected data",
92+
examples=["my_dataset", "$inputs.target_al_dataset"],
93+
)
94+
usage_quota_name: str = Field(
95+
description="Unique name for Roboflow project pointed by `target_project` parameter, that identifies "
96+
"usage quota applied for this block.",
97+
examples=["quota-for-data-sampling-1"],
98+
)
99+
data_percentage: Union[
100+
FloatZeroToHundred, WorkflowParameterSelector(kind=[FLOAT_KIND])
101+
] = Field(
102+
description="Percent of data that will be saved (in range [0.0, 100.0])",
103+
examples=[True, False, "$inputs.persist_predictions"],
104+
)
105+
persist_predictions: Union[bool, WorkflowParameterSelector(kind=[BOOLEAN_KIND])] = (
106+
Field(
107+
default=True,
108+
description="Boolean flag to decide if predictions should be registered along with images",
109+
examples=[True, False, "$inputs.persist_predictions"],
110+
)
111+
)
112+
minutely_usage_limit: int = Field(
113+
default=10,
114+
description="Maximum number of data registration requests per minute accounted in scope of "
115+
"single server or whole Roboflow platform, depending on context of usage.",
116+
examples=[10, 60],
117+
)
118+
hourly_usage_limit: int = Field(
119+
default=100,
120+
description="Maximum number of data registration requests per hour accounted in scope of "
121+
"single server or whole Roboflow platform, depending on context of usage.",
122+
examples=[10, 60],
123+
)
124+
daily_usage_limit: int = Field(
125+
default=1000,
126+
description="Maximum number of data registration requests per day accounted in scope of "
127+
"single server or whole Roboflow platform, depending on context of usage.",
128+
examples=[10, 60],
129+
)
130+
max_image_size: Tuple[int, int] = Field(
131+
default=(1920, 1080),
132+
description="Maximum size of the image to be registered - bigger images will be "
133+
"downsized preserving aspect ratio. Format of data: `(width, height)`",
134+
examples=[(1920, 1080), (512, 512)],
135+
)
136+
compression_level: int = Field(
137+
default=95,
138+
gt=0,
139+
le=100,
140+
description="Compression level for images registered",
141+
examples=[95, 75],
142+
)
143+
registration_tags: List[
144+
Union[WorkflowParameterSelector(kind=[STRING_KIND]), str]
145+
] = Field(
146+
default_factory=list,
147+
description="Tags to be attached to registered datapoints",
148+
examples=[["location-florida", "factory-name", "$inputs.dynamic_tag"]],
149+
)
150+
disable_sink: Union[bool, WorkflowParameterSelector(kind=[BOOLEAN_KIND])] = Field(
151+
default=False,
152+
description="boolean flag that can be also reference to input - to arbitrarily disable "
153+
"data collection for specific request",
154+
examples=[True, "$inputs.disable_active_learning"],
155+
)
156+
fire_and_forget: Union[bool, WorkflowParameterSelector(kind=[BOOLEAN_KIND])] = (
157+
Field(
158+
default=True,
159+
description="Boolean flag dictating if sink is supposed to be executed in the background, "
160+
"not waiting on status of registration before end of workflow run. Use `True` if best-effort "
161+
"registration is needed, use `False` while debugging and if error handling is needed",
162+
)
163+
)
164+
labeling_batch_prefix: Union[str, WorkflowParameterSelector(kind=[STRING_KIND])] = (
165+
Field(
166+
default="workflows_data_collector",
167+
description="Prefix of the name for labeling batches that will be registered in Roboflow app",
168+
examples=["my_labeling_batch_name"],
169+
)
170+
)
171+
labeling_batches_recreation_frequency: BatchCreationFrequency = Field(
172+
default="never",
173+
description="Frequency in which new labeling batches are created in Roboflow app. New batches "
174+
"are created with name prefix provided in `labeling_batch_prefix` in given time intervals."
175+
"Useful in organising labeling flow.",
176+
examples=["never", "daily"],
177+
)
178+
179+
@classmethod
180+
def accepts_batch_input(cls) -> bool:
181+
return True
182+
183+
@classmethod
184+
def describe_outputs(cls) -> List[OutputDefinition]:
185+
return [
186+
OutputDefinition(name="error_status", kind=[BATCH_OF_BOOLEAN_KIND]),
187+
OutputDefinition(name="message", kind=[BATCH_OF_STRING_KIND]),
188+
]
189+
190+
@classmethod
191+
def get_execution_engine_compatibility(cls) -> Optional[str]:
192+
return ">=1.0.0,<2.0.0"
193+
194+
195+
class RoboflowDatasetUploadBlockV2(WorkflowBlock):
196+
197+
def __init__(
198+
self,
199+
cache: BaseCache,
200+
api_key: Optional[str],
201+
background_tasks: Optional[BackgroundTasks],
202+
thread_pool_executor: Optional[ThreadPoolExecutor],
203+
):
204+
self._cache = cache
205+
self._api_key = api_key
206+
self._background_tasks = background_tasks
207+
self._thread_pool_executor = thread_pool_executor
208+
209+
@classmethod
210+
def get_init_parameters(cls) -> List[str]:
211+
return ["cache", "api_key", "background_tasks", "thread_pool_executor"]
212+
213+
@classmethod
214+
def get_manifest(cls) -> Type[WorkflowBlockManifest]:
215+
return BlockManifest
216+
217+
def run(
218+
self,
219+
images: Batch[WorkflowImageData],
220+
predictions: Optional[Batch[Union[sv.Detections, dict]]],
221+
target_project: str,
222+
usage_quota_name: str,
223+
data_percentage: float,
224+
minutely_usage_limit: int,
225+
persist_predictions: bool,
226+
hourly_usage_limit: int,
227+
daily_usage_limit: int,
228+
max_image_size: Tuple[int, int],
229+
compression_level: int,
230+
registration_tags: List[str],
231+
disable_sink: bool,
232+
fire_and_forget: bool,
233+
labeling_batch_prefix: str,
234+
labeling_batches_recreation_frequency: BatchCreationFrequency,
235+
) -> BlockResult:
236+
if self._api_key is None:
237+
raise ValueError(
238+
"RoboflowDataCollector block cannot run without Roboflow API key. "
239+
"If you do not know how to get API key - visit "
240+
"https://docs.roboflow.com/api-reference/authentication#retrieve-an-api-key to learn how to "
241+
"retrieve one."
242+
)
243+
if disable_sink:
244+
return [
245+
{
246+
"error_status": False,
247+
"message": "Sink was disabled by parameter `disable_sink`",
248+
}
249+
for _ in range(len(images))
250+
]
251+
result = []
252+
predictions = [None] * len(images) if predictions is None else predictions
253+
for image, prediction in zip(images, predictions):
254+
error_status, message = maybe_register_datapoint_at_roboflow(
255+
image=image,
256+
prediction=prediction,
257+
target_project=target_project,
258+
usage_quota_name=usage_quota_name,
259+
data_percentage=data_percentage,
260+
persist_predictions=persist_predictions,
261+
minutely_usage_limit=minutely_usage_limit,
262+
hourly_usage_limit=hourly_usage_limit,
263+
daily_usage_limit=daily_usage_limit,
264+
max_image_size=max_image_size,
265+
compression_level=compression_level,
266+
registration_tags=registration_tags,
267+
fire_and_forget=fire_and_forget,
268+
labeling_batch_prefix=labeling_batch_prefix,
269+
new_labeling_batch_frequency=labeling_batches_recreation_frequency,
270+
cache=self._cache,
271+
background_tasks=self._background_tasks,
272+
thread_pool_executor=self._thread_pool_executor,
273+
api_key=self._api_key,
274+
)
275+
result.append({"error_status": error_status, "message": message})
276+
return result
277+
278+
279+
def maybe_register_datapoint_at_roboflow(
280+
image: WorkflowImageData,
281+
prediction: Optional[Union[sv.Detections, dict]],
282+
target_project: str,
283+
usage_quota_name: str,
284+
data_percentage: float,
285+
persist_predictions: bool,
286+
minutely_usage_limit: int,
287+
hourly_usage_limit: int,
288+
daily_usage_limit: int,
289+
max_image_size: Tuple[int, int],
290+
compression_level: int,
291+
registration_tags: List[str],
292+
fire_and_forget: bool,
293+
labeling_batch_prefix: str,
294+
new_labeling_batch_frequency: BatchCreationFrequency,
295+
cache: BaseCache,
296+
background_tasks: Optional[BackgroundTasks],
297+
thread_pool_executor: Optional[ThreadPoolExecutor],
298+
api_key: str,
299+
) -> Tuple[bool, str]:
300+
normalised_probability = data_percentage / 100
301+
if random.random() < normalised_probability:
302+
return register_datapoint_at_roboflow(
303+
image=image,
304+
prediction=prediction,
305+
target_project=target_project,
306+
usage_quota_name=usage_quota_name,
307+
persist_predictions=persist_predictions,
308+
minutely_usage_limit=minutely_usage_limit,
309+
hourly_usage_limit=hourly_usage_limit,
310+
daily_usage_limit=daily_usage_limit,
311+
max_image_size=max_image_size,
312+
compression_level=compression_level,
313+
registration_tags=registration_tags,
314+
fire_and_forget=fire_and_forget,
315+
labeling_batch_prefix=labeling_batch_prefix,
316+
new_labeling_batch_frequency=new_labeling_batch_frequency,
317+
cache=cache,
318+
background_tasks=background_tasks,
319+
thread_pool_executor=thread_pool_executor,
320+
api_key=api_key,
321+
)
322+
return False, "Registration skipped due to sampling settings"

0 commit comments

Comments
 (0)