Skip to content

Commit

Permalink
chg ! refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
vitali-yanushchyk-valor committed Jan 27, 2025
1 parent 3bebe7b commit ce25f45
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 103 deletions.
77 changes: 1 addition & 76 deletions src/hope_dedup_engine/apps/api/models/deduplication.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from itertools import chain
from typing import Any, Final, override
from uuid import uuid4

Expand All @@ -7,18 +6,7 @@
from django.db import models, transaction

from hope_dedup_engine.apps.security.models import ExternalSystem
from hope_dedup_engine.types import (
Embedding,
EntityEmbedding,
EntityEmbeddingError,
Filename,
Finding,
IgnoredPair,
ImageEmbedding,
ImageEmbeddingError,
Score,
SortedTuple,
)
from hope_dedup_engine.types import ImageEmbedding, ImageEmbeddingError

REFERENCE_PK_LENGTH: Final[int] = 100

Expand Down Expand Up @@ -79,31 +67,6 @@ class State(models.IntegerChoices):
def __str__(self) -> str:
return self.name or f"ID: {self.pk}"

def get_encodings(self) -> dict[Filename, Embedding]:
return self.encodings

def get_findings(self) -> list[Finding]:
return list(
self.finding_set.values_list(
"first_reference_pk", "second_reference_pk", "score"
)
)

def get_ignored_pairs(self) -> set[IgnoredPair]:
return set(
chain(
map(
SortedTuple,
self.ignoredreferencepkpair_set.values_list("first", "second"),
),
map(
SortedTuple,
list(self.ignoredfilenamepair_set.values_list("first", "second")),
),
)
)

@transaction.atomic
def update_encodings(self, encodings: list[ImageEmbedding]) -> None:
with transaction.atomic():
fresh_self: DeduplicationSet = (
Expand All @@ -112,7 +75,6 @@ def update_encodings(self, encodings: list[ImageEmbedding]) -> None:
fresh_self.encodings.update(encodings)
fresh_self.save()

@transaction.atomic
def update_encoding_errors(self, errors: list[ImageEmbeddingError]) -> None:
with transaction.atomic():
fresh_self: DeduplicationSet = (
Expand All @@ -121,43 +83,6 @@ def update_encoding_errors(self, errors: list[ImageEmbeddingError]) -> None:
fresh_self.encoding_errors.update(errors)
fresh_self.save()

def update_findings(
self, findings: list[tuple[EntityEmbedding, EntityEmbedding, Score]]
) -> None:
images = Image.objects.filter(deduplication_set=self).values(
"filename", "reference_pk"
)
filename_to_reference_pk = {
img["filename"]: img["reference_pk"] for img in images
} | {"": ""}
findings_to_create = [
Finding(
deduplication_set=self,
first_reference_pk=filename_to_reference_pk.get(first_filename),
first_filename=first_filename,
second_reference_pk=filename_to_reference_pk.get(second_filename),
second_filename=second_filename,
score=score,
)
for first_filename, second_filename, score in findings
]
Finding.objects.bulk_create(findings_to_create, ignore_conflicts=True)

def update_finding_errors(
self, encoding_errors: list[EntityEmbeddingError]
) -> None:

errors_to_create = [
Finding(
deduplication_set=self,
first_reference_pk=reference_pk,
first_filename=filename,
status_code=Finding.StatusCode[error].value,
)
for reference_pk, filename, error in encoding_errors
]
Finding.objects.bulk_create(errors_to_create, ignore_conflicts=True)


class Image(models.Model):
"""
Expand Down
12 changes: 6 additions & 6 deletions src/hope_dedup_engine/apps/faces/celery/tasks/deduplication.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
from hope_dedup_engine.apps.faces.services.facial import (
encode_faces,
find_similar_faces,
get_ignored_pairs,
update_finding_errors,
update_findings,
)
from hope_dedup_engine.config.celery import app

# from hope_dedup_engine.constants import FacialError
from hope_dedup_engine.types import EntityEmbedding, Filename, SortedTuple
from hope_dedup_engine.utils import compact_pairs
from hope_dedup_engine.utils.celery.task_result import wrapped
Expand Down Expand Up @@ -46,7 +47,7 @@ def filter_ignored_pairs(
embedding_pairs: Iterable[tuple[EntityEmbedding, EntityEmbedding]],
deduplication_set: DeduplicationSet,
) -> Generator[tuple[EntityEmbedding, EntityEmbedding], None, None]:
ignored_pairs = deduplication_set.get_ignored_pairs()
ignored_pairs = get_ignored_pairs(deduplication_set)
for embedding_pair in embedding_pairs:
first, second = embedding_pair
first_reference_pk, _ = first
Expand Down Expand Up @@ -87,7 +88,7 @@ def find_duplicates(
dedupe_threshold=deduplicate_config.get("threshold"),
options=deduplicate_config,
)
deduplication_set.update_findings(findings)
update_findings(deduplication_set, findings)


@app.task
Expand All @@ -97,11 +98,10 @@ def save_encoding_errors_in_findings(deduplication_set_id: str) -> None:
pk=deduplication_set_id
)
embedding_errors = [
# (reference_pk, FacialError(deduplication_set.encoding_errors[filename]))
(reference_pk, filename, deduplication_set.encoding_errors[filename])
for reference_pk, filename in deduplication_set.image_set.values_list(
"reference_pk", "filename"
)
if filename in deduplication_set.encoding_errors
]
deduplication_set.update_finding_errors(embedding_errors)
update_finding_errors(deduplication_set, embedding_errors)
80 changes: 76 additions & 4 deletions src/hope_dedup_engine/apps/faces/services/facial.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
import logging
from collections.abc import Generator, Iterable
from itertools import chain
from typing import Any, cast

# from hope_dedup_engine.types import EncodingType, FindingType, IgnoredPairType
from django.db import transaction

from deepface import DeepFace

from hope_dedup_engine.apps.api.models import Finding
from hope_dedup_engine.apps.api.models import DeduplicationSet, Finding, Image
from hope_dedup_engine.apps.faces.managers import ImagesStorageManager

# from hope_dedup_engine.constants import FacialError
from hope_dedup_engine.types import (
Embedding,
EntityEmbedding,
EntityEmbeddingError,
EntityIgnoredPair,
Filename,
ImageEmbedding,
ImageEmbeddingError,
Score,
SortedTuple,
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -71,3 +75,71 @@ def find_similar_faces(
similarity = face_similarity(first_embedding, second_embedding, **options)
if similarity >= dedupe_threshold:
yield first_filename, second_filename, similarity


def update_findings(
deduplication_set: DeduplicationSet,
findings: list[tuple[EntityEmbedding, EntityEmbedding, Score]],
) -> None:

filename_to_reference_pk = dict(
Image.objects.filter(deduplication_set=deduplication_set).values_list(
"filename", "reference_pk"
)
)
findings_to_create = (
Finding(
deduplication_set=deduplication_set,
first_reference_pk=filename_to_reference_pk.get(first_filename),
first_filename=first_filename,
second_reference_pk=filename_to_reference_pk.get(second_filename),
second_filename=second_filename,
score=score,
)
for first_filename, second_filename, score in findings
)
bulk_create_findings(findings_to_create)


def update_finding_errors(
deduplication_set: DeduplicationSet, encoding_errors: list[EntityEmbeddingError]
):
errors_to_create = (
Finding(
deduplication_set=deduplication_set,
first_reference_pk=reference_pk,
first_filename=filename,
status_code=Finding.StatusCode[error].value,
)
for reference_pk, filename, error in encoding_errors
)
bulk_create_findings(errors_to_create)


def bulk_create_findings(findings: Iterable[Finding]) -> None:
findings_list = list(findings)
if findings_list:
with transaction.atomic():
Finding.objects.bulk_create(findings_list, ignore_conflicts=True)
logger.info(f"Created {len(findings_list)} findings.")


def get_ignored_pairs(deduplication_set: DeduplicationSet) -> set[EntityIgnoredPair]:
return set(
chain(
map(
SortedTuple,
deduplication_set.ignoredreferencepkpair_set.values_list(
"first", "second"
),
),
map(
SortedTuple,
list(
deduplication_set.ignoredfilenamepair_set.values_list(
"first", "second"
)
),
),
)
)
25 changes: 8 additions & 17 deletions src/hope_dedup_engine/types.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,28 @@
from collections.abc import Iterable
from enum import Enum
from typing import Self
from typing import TYPE_CHECKING, Self

# from hope_dedup_engine.constants import FacialError
# TODO:
# from hope_dedup_engine.apps.api.models.deduplication import Finding as FindingModel
if TYPE_CHECKING:
from hope_dedup_engine.apps.api.models.deduplication import ( # noqa: F401
Finding as FindingModel,
)

# .Image import StatusCode

type ReferencePK = str
type Filename = str
type Embedding = list[float]
type Score = float


class FacialError(Enum):
GENERIC_ERROR = 999
NO_FACE_DETECTED = 998
MULTIPLE_FACES_DETECTED = 997
NO_FILE_FOUND = 996


EntityImage = tuple[ReferencePK, Filename]
EntityEmbedding = tuple[ReferencePK, Embedding]
EntityEmbeddingError = tuple[ReferencePK, FacialError]
EntityEmbeddingError = tuple[ReferencePK, "FindingModel.StatusCode"]
ImageEmbedding = tuple[Filename, Embedding]
ImageEmbeddingError = tuple[Filename, FacialError]
Finding = tuple[ReferencePK, ReferencePK, Score]
ImageEmbeddingError = tuple[Filename, "FindingModel.StatusCode"]


class SortedTuple(tuple):
def __new__(cls, iterable: Iterable) -> Self:
return tuple.__new__(cls, sorted(iterable))


IgnoredPair = SortedTuple[ReferencePK, ReferencePK]
EntityIgnoredPair = SortedTuple[ReferencePK, ReferencePK]

0 comments on commit ce25f45

Please sign in to comment.