diff --git a/requirements.in b/requirements.in index 9f737e5fd..65989ba48 100644 --- a/requirements.in +++ b/requirements.in @@ -52,6 +52,7 @@ sentry-sdk>=2.13.0 sentry-sdk[celery] SQLAlchemy SQLAlchemy-Utils +sqlparse statsd stripe>=11.4.1 time-machine diff --git a/requirements.txt b/requirements.txt index 34ecf93ad..3093cd85d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -400,7 +400,9 @@ sqlalchemy-utils==0.36.8 # -r requirements.in # pytest-sqlalchemy sqlparse==0.5.0 - # via django + # via + # -r requirements.in + # django statsd==3.3.0 # via -r requirements.in stripe==11.4.1 diff --git a/services/cleanup/__init__.py b/services/cleanup/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/services/cleanup/cleanup.py b/services/cleanup/cleanup.py new file mode 100644 index 000000000..cc350226a --- /dev/null +++ b/services/cleanup/cleanup.py @@ -0,0 +1,52 @@ +import logging + +from django.db.models.query import QuerySet + +from services.cleanup.models import MANUAL_CLEANUP +from services.cleanup.relations import build_relation_graph +from services.cleanup.utils import CleanupContext, CleanupResult, CleanupSummary + +log = logging.getLogger(__name__) + + +def run_cleanup( + query: QuerySet, +) -> CleanupSummary: + """ + Cleans up all the models and storage files reachable from the given `QuerySet`. + + This deletes all database models in topological sort order, and also removes + all the files in storage for any of the models in the relationship graph. + + Returns the number of models and files being cleaned up in total, and per-Model. + """ + context = CleanupContext() + models_to_cleanup = build_relation_graph(query) + + summary = {} + cleaned_models = 0 + cleaned_files = 0 + + for model, query in models_to_cleanup: + manual_cleanup = MANUAL_CLEANUP.get(model) + if manual_cleanup is not None: + result = manual_cleanup(context, query) + else: + result = CleanupResult(query._raw_delete(query.db)) + + if result.cleaned_models > 0 or result.cleaned_files > 0: + summary[model] = result + + log.info( + f"Finished cleaning up {model.__name__}", + extra={ + "cleaned_models": result.cleaned_models, + "cleaned_files": result.cleaned_files, + }, + ) + + cleaned_models += result.cleaned_models + cleaned_files += result.cleaned_files + + totals = CleanupResult(cleaned_models, cleaned_files) + return CleanupSummary(totals, summary) diff --git a/services/cleanup/models.py b/services/cleanup/models.py new file mode 100644 index 000000000..2fbc1848a --- /dev/null +++ b/services/cleanup/models.py @@ -0,0 +1,199 @@ +import dataclasses +import itertools +from collections import defaultdict +from collections.abc import Callable +from concurrent.futures import ThreadPoolExecutor +from functools import partial + +from django.db.models import Model +from django.db.models.query import QuerySet +from shared.bundle_analysis import StoragePaths +from shared.django_apps.compare.models import CommitComparison +from shared.django_apps.core.models import Commit, Pull +from shared.django_apps.profiling.models import ProfilingUpload +from shared.django_apps.reports.models import CommitReport, ReportDetails +from shared.django_apps.reports.models import ReportSession as Upload +from shared.django_apps.staticanalysis.models import StaticAnalysisSingleFileSnapshot + +from services.archive import ArchiveService, MinioEndpoints +from services.cleanup.utils import CleanupContext, CleanupResult + +MANUAL_QUERY_CHUNKSIZE = 5_000 +DELETE_FILES_BATCHSIZE = 50 + + +def cleanup_files_batched( + context: CleanupContext, buckets_paths: dict[str, list[str]] +) -> int: + cleaned_files = 0 + + # TODO: maybe reuse the executor across calls? + with ThreadPoolExecutor() as e: + for bucket, paths in buckets_paths.items(): + for batched_paths in itertools.batched(paths, DELETE_FILES_BATCHSIZE): + e.submit(context.storage.delete_files, bucket, list(batched_paths)) + cleaned_files += len(paths) + + return cleaned_files + + +def cleanup_with_storage_field( + path_field: str, + context: CleanupContext, + query: QuerySet, +) -> CleanupResult: + cleaned_files = 0 + + # delete `None` `path_field`s right away + cleaned_models = query.filter(**{f"{path_field}__isnull": True})._raw_delete( + query.db + ) + + # delete all those files from storage, using chunks based on the `id` column + storage_query = query.filter(**{f"{path_field}__isnull": False}).order_by("id") + + while True: + storage_paths = storage_query.values_list(path_field, flat=True)[ + :MANUAL_QUERY_CHUNKSIZE + ] + if len(storage_paths) == 0: + break + + cleaned_files += cleanup_files_batched( + context, {context.default_bucket: storage_paths} + ) + cleaned_models += query.filter( + id__in=storage_query[:MANUAL_QUERY_CHUNKSIZE] + )._raw_delete(query.db) + + return CleanupResult(cleaned_models, cleaned_files) + + +def cleanup_archivefield( + field_name: str, context: CleanupContext, query: QuerySet +) -> CleanupResult: + model_field_name = f"_{field_name}_storage_path" + + return cleanup_with_storage_field(model_field_name, context, query) + + +# This has all the `Repository` fields needed by `get_archive_hash` +@dataclasses.dataclass +class FakeRepository: + repoid: int + service: str + service_id: str + + +def cleanup_commitreport(context: CleanupContext, query: QuerySet) -> CleanupResult: + coverage_reports = query.values_list( + "report_type", + "code", + "external_id", + "commit__commitid", + "commit__repository__repoid", + "commit__repository__author__service", + "commit__repository__service_id", + ).order_by("id") + + cleaned_models = 0 + cleaned_files = 0 + repo_hashes: dict[int, str] = {} + + while True: + reports = coverage_reports[:MANUAL_QUERY_CHUNKSIZE] + if len(reports) == 0: + break + + buckets_paths: dict[str, list[str]] = defaultdict(list) + for ( + report_type, + report_code, + external_id, + commit_sha, + repoid, + repo_service, + repo_service_id, + ) in reports: + if repoid not in repo_hashes: + fake_repo = FakeRepository( + repoid=repoid, service=repo_service, service_id=repo_service_id + ) + repo_hashes[repoid] = ArchiveService.get_archive_hash(fake_repo) + repo_hash = repo_hashes[repoid] + + # depending on the `report_type`, we have: + # - a `chunks` file for coverage + # - a `bundle_report.sqlite` for BA + if report_type == "bundle_analysis": + path = StoragePaths.bundle_report.path( + repo_key=repo_hash, report_key=external_id + ) + buckets_paths[context.bundleanalysis_bucket].append(path) + elif report_type == "test_results": + # TA has cached rollups, but those are based on `Branch` + pass + else: + chunks_file_name = report_code if report_code is not None else "chunks" + path = MinioEndpoints.chunks.get_path( + version="v4", + repo_hash=repo_hash, + commitid=commit_sha, + chunks_file_name=chunks_file_name, + ) + buckets_paths[context.default_bucket].append(path) + + cleaned_files += cleanup_files_batched(context, buckets_paths) + cleaned_models += query.filter( + id__in=query.order_by("id")[:MANUAL_QUERY_CHUNKSIZE] + )._raw_delete(query.db) + + return CleanupResult(cleaned_models, cleaned_files) + + +def cleanup_upload(context: CleanupContext, query: QuerySet) -> CleanupResult: + cleaned_files = 0 + + # delete `None` `storage_path`s right away + cleaned_models = query.filter(storage_path__isnull=True)._raw_delete(query.db) + + # delete all those files from storage, using chunks based on the `id` column + storage_query = query.filter(storage_path__isnull=False).order_by("id") + + while True: + uploads = storage_query.values_list("report__report_type", "storage_path")[ + :MANUAL_QUERY_CHUNKSIZE + ] + if len(uploads) == 0: + break + + buckets_paths: dict[str, list[str]] = defaultdict(list) + for report_type, storage_path in uploads: + if report_type == "bundle_analysis": + buckets_paths[context.bundleanalysis_bucket].append(storage_path) + else: + buckets_paths[context.default_bucket].append(storage_path) + + cleaned_files += cleanup_files_batched(context, buckets_paths) + cleaned_models += query.filter( + id__in=storage_query[:MANUAL_QUERY_CHUNKSIZE] + )._raw_delete(query.db) + + return CleanupResult(cleaned_models, cleaned_files) + + +# All the models that need custom python code for deletions so a bulk `DELETE` query does not work. +MANUAL_CLEANUP: dict[ + type[Model], Callable[[CleanupContext, QuerySet], CleanupResult] +] = { + Commit: partial(cleanup_archivefield, "report"), + Pull: partial(cleanup_archivefield, "flare"), + ReportDetails: partial(cleanup_archivefield, "files_array"), + CommitReport: cleanup_commitreport, + Upload: cleanup_upload, + CommitComparison: partial(cleanup_with_storage_field, "report_storage_path"), + ProfilingUpload: partial(cleanup_with_storage_field, "raw_upload_location"), + StaticAnalysisSingleFileSnapshot: partial( + cleanup_with_storage_field, "content_location" + ), +} diff --git a/services/cleanup/owner.py b/services/cleanup/owner.py new file mode 100644 index 000000000..e61fcf652 --- /dev/null +++ b/services/cleanup/owner.py @@ -0,0 +1,54 @@ +import logging + +from django.db import transaction +from django.db.models import Q +from shared.django_apps.codecov_auth.models import Owner, OwnerProfile +from shared.django_apps.core.models import Commit, Pull, Repository + +from services.cleanup.cleanup import run_cleanup +from services.cleanup.utils import CleanupSummary + +log = logging.getLogger(__name__) + +CLEAR_ARRAY_FIELDS = ["plan_activated_users", "organizations", "admins"] + + +def cleanup_owner(owner_id: int) -> CleanupSummary: + log.info("Started/Continuing Owner cleanup", extra={"owner_id": owner_id}) + + clear_owner_references(owner_id) + owner_query = Owner.objects.filter(ownerid=owner_id) + summary = run_cleanup(owner_query) + + log.info("Owner cleanup finished", extra={"owner_id": owner_id, "summary": summary}) + return summary + + +# TODO: maybe turn this into a `MANUAL_CLEANUP`? +def clear_owner_references(owner_id: int): + """ + This clears the `ownerid` from various DB arrays where it is being referenced. + """ + + OwnerProfile.objects.filter(default_org=owner_id).update(default_org=None) + Owner.objects.filter(bot=owner_id).update(bot=None) + Repository.objects.filter(bot=owner_id).update(bot=None) + Commit.objects.filter(author=owner_id).update(author=None) + Pull.objects.filter(author=owner_id).update(author=None) + + # This uses a transaction / `select_for_update` to ensure consistency when + # modifying these `ArrayField`s in python. + # I don’t think we have such consistency anyplace else in the codebase, so + # if this is causing lock contention issues, its also fair to avoid this. + with transaction.atomic(): + filter = Q() + for field in CLEAR_ARRAY_FIELDS: + filter = filter | Q(**{f"{field}__contains": [owner_id]}) + + owners_with_reference = Owner.objects.select_for_update().filter(filter) + for owner in owners_with_reference: + for field in CLEAR_ARRAY_FIELDS: + array = getattr(owner, field) + setattr(owner, field, [x for x in array if x != owner_id]) + + owner.save(update_fields=CLEAR_ARRAY_FIELDS) diff --git a/services/cleanup/relations.py b/services/cleanup/relations.py new file mode 100644 index 000000000..aa58ad8ab --- /dev/null +++ b/services/cleanup/relations.py @@ -0,0 +1,132 @@ +import dataclasses +from collections import defaultdict +from graphlib import TopologicalSorter + +from django.db.models import Model, Q +from django.db.models.query import QuerySet +from shared.django_apps.codecov_auth.models import Owner, OwnerProfile +from shared.django_apps.core.models import Commit, Pull, Repository +from shared.django_apps.reports.models import DailyTestRollup, TestInstance + +# Relations referencing 0 through field 1 of model 2: +IGNORE_RELATIONS: set[tuple[type[Model], str, type[Model]]] = { + (Owner, "default_org", OwnerProfile), + (Owner, "bot", Owner), + (Owner, "bot", Repository), + (Owner, "author", Commit), + (Owner, "author", Pull), + (Repository, "forkid", Repository), +} + +# Relations which have no proper foreign key: +UNDOCUMENTED_RELATIONS: set[tuple[type[Model], str, type[Model]]] = { + (Repository, "repoid", TestInstance), + (Repository, "repoid", DailyTestRollup), +} + + +@dataclasses.dataclass +class Node: + edges: dict[type[Model], list[str]] = dataclasses.field( + default_factory=lambda: defaultdict(list) + ) + queryset: QuerySet = dataclasses.field(default_factory=Q) + depth: int = 9999 + + +def build_relation_graph(query: QuerySet) -> list[tuple[type[Model], QuerySet]]: + """ + This takes as input a django `QuerySet`, like `Repository.objects.filter(repoid=123)`. + + It then walks the django relation graph, resolving all the models that have a relationship **to** the input model, + returning those models along with a `QuerySet` that allows either querying or deleting those models. + + The returned list is in topological sorting order, so related models are always sorted before models they depend on. + """ + nodes: dict[type[Model], Node] = defaultdict(Node) + graph: TopologicalSorter[type[Model]] = TopologicalSorter() + + def process_relation( + model: type[Model], related_model_field: str, related_model: type[Model] + ): + if (model, related_model_field, related_model) in IGNORE_RELATIONS: + return + + graph.add(model, related_model) + nodes[model].edges[related_model].append(related_model_field) + + if related_model not in nodes: + process_model(related_model) + + def process_model(model: type[Model]): + for ( + referenced_model, + related_model_field, + related_model, + ) in UNDOCUMENTED_RELATIONS: + if referenced_model == model: + process_relation(model, related_model_field, related_model) + + if not (meta := model._meta): + return + + for field in meta.get_fields(include_hidden=True): + if not field.is_relation: + continue + + if field.one_to_many or field.one_to_one: + # Most likely the reverse of a `ForeignKey` + # + + if not hasattr(field, "field"): + # I believe this is the actual *forward* definition of a `OneToOne` + continue + + # this should be the actual `ForeignKey` definition: + actual_field = field.field + if actual_field.model == model: + # this field goes from *this* model to another, but we are interested in the reverse actually + continue + + related_model = actual_field.model + related_model_field = actual_field.name + process_relation(model, related_model_field, related_model) + + elif field.many_to_many: + if not hasattr(field, "through"): + # we want to delete all related records on the join table + continue + + related_model = field.through + join_meta = related_model._meta + for field in join_meta.get_fields(include_hidden=True): + if not field.is_relation or field.model != model: + continue + + related_model_field = actual_field.name + process_relation(model, related_model_field, related_model) + + process_model(query.model) + + # the topological sort yields models in the order we want to run deletions + sorted_models = list(graph.static_order()) + + # but for actually building the querysets, we prefer the order from root to leafs + nodes[query.model].queryset = query + nodes[query.model].depth = 0 + for model in reversed(sorted_models): + node = nodes[model] + depth = node.depth + 1 + + for related_model, related_fields in node.edges.items(): + related_node = nodes[related_model] + + if depth < related_node.depth: + filter = Q() + for field in related_fields: + filter = filter | Q(**{f"{field}__in": node.queryset}) + + related_node.queryset = related_model.objects.filter(filter) + related_node.depth = depth + + return [(model, nodes[model].queryset) for model in sorted_models] diff --git a/services/cleanup/repository.py b/services/cleanup/repository.py new file mode 100644 index 000000000..1309a0c51 --- /dev/null +++ b/services/cleanup/repository.py @@ -0,0 +1,89 @@ +import logging +from uuid import uuid4 + +from django.db import transaction +from shared.django_apps.codecov_auth.models import Owner +from shared.django_apps.core.models import Repository + +from services.cleanup.cleanup import run_cleanup +from services.cleanup.utils import CleanupSummary + +log = logging.getLogger(__name__) + + +def cleanup_repo(repo_id: int) -> CleanupSummary: + cleanup_started, owner_id = start_repo_cleanup(repo_id) + + if cleanup_started: + log.info("Started Repository cleanup", extra={"repo_id": repo_id}) + else: + log.info("Continuing Repository cleanup", extra={"repo_id": repo_id}) + + repo_query = Repository.objects.filter(repoid=repo_id) + summary = run_cleanup(repo_query) + Owner.objects.filter(ownerid=owner_id).delete() + + log.info( + "Repository cleanup finished", extra={"repoid": repo_id, "summary": summary} + ) + return summary + + +def start_repo_cleanup(repo_id: int) -> tuple[bool, int]: + """ + Starts Repository deletion by marking the repository as `deleted`, and moving + it to a newly created "shadow Owner". + + This newly created `Owner` only has a valid `service` and `service_id`, + which are the only required non-NULL fields without defaults, and is otherwise + completely empty. + + The `ownerid` of this newly created owner is being returned along with a flag + indicating whether the repo cleanup was just started, or whether it is already + marked for deletion, and this function is being retried. + It is expected that repo cleanup is a slow process and might be done in more steps. + """ + # Runs in a transaction as we do not want to leave leftover shadow owners in + # case anything goes wrong here. + with transaction.atomic(): + ( + repo_deleted, + owner_id, + owner_name, + owner_username, + owner_service, + owner_service_id, + ) = Repository.objects.values_list( + "deleted", + "author__ownerid", + "author__name", + "author__username", + "author__service", + "author__service_id", + ).get(repoid=repo_id) + + if repo_deleted and not owner_name and not owner_username: + return (False, owner_id) + + # We mark the repository as "scheduled for deletion" by setting the `deleted` + # flag, moving it to a new shadow owner, and clearing some tokens. + shadow_owner = Owner.objects.create( + # `Owner` is unique across service/id, and both are non-NULL, + # so we cannot duplicate the values just like that, so lets change up the `service_id` + # a bit. We need the `Repository.service_id` for further `ArchiveService` deletions. + service=owner_service, + service_id=f"☠️{owner_service_id}☠️", + ) + new_token = uuid4().hex + Repository.objects.filter(repoid=repo_id).update( + deleted=True, + author=shadow_owner, + upload_token=new_token, + image_token=new_token, + ) + + # The equivalent of `SET NULL`: + # TODO: maybe turn this into a `MANUAL_CLEANUP`? + Repository.objects.filter(fork=repo_id).update(fork=None) + + return (True, shadow_owner.ownerid) diff --git a/services/cleanup/tests/__init__.py b/services/cleanup/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/services/cleanup/tests/snapshots/relations__builds_delete_queries__owner.txt b/services/cleanup/tests/snapshots/relations__builds_delete_queries__owner.txt new file mode 100644 index 000000000..1cbb4f8c4 --- /dev/null +++ b/services/cleanup/tests/snapshots/relations__builds_delete_queries__owner.txt @@ -0,0 +1,606 @@ +-- YamlHistory +DELETE +FROM "yaml_history" +WHERE ("yaml_history"."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s) + OR "yaml_history"."author" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s)) + + +-- CommitNotification +DELETE +FROM "commit_notifications" +WHERE "commit_notifications"."gh_app_id" IN + (SELECT V0."id" + FROM "codecov_auth_githubappinstallation" V0 + WHERE V0."owner_id" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s)) + + +-- OwnerInstallationNameToUseForTask +DELETE +FROM "codecov_auth_ownerinstallationnametousefortask" +WHERE "codecov_auth_ownerinstallationnametousefortask"."owner_id" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s) + + +-- OrganizationLevelToken +DELETE +FROM "codecov_auth_organizationleveltoken" +WHERE "codecov_auth_organizationleveltoken"."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s) + + +-- OwnerProfile +DELETE +FROM "codecov_auth_ownerprofile" +WHERE "codecov_auth_ownerprofile"."owner_id" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s) + + +-- Session +DELETE +FROM "sessions" +WHERE "sessions"."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s) + + +-- UserToken +DELETE +FROM "codecov_auth_usertoken" +WHERE "codecov_auth_usertoken"."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s) + + +-- TestInstance +DELETE +FROM "reports_testinstance" +WHERE "reports_testinstance"."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s)) + + +-- DailyTestRollup +DELETE +FROM "reports_dailytestrollups" +WHERE "reports_dailytestrollups"."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s)) + + +-- RepositoryToken +DELETE +FROM "codecov_auth_repositorytoken" +WHERE "codecov_auth_repositorytoken"."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s)) + + +-- Branch +DELETE +FROM "branches" +WHERE "branches"."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s)) + + +-- FlagComparison +DELETE +FROM "compare_flagcomparison" +WHERE "compare_flagcomparison"."repositoryflag_id" IN + (SELECT W0."id" + FROM "reports_repositoryflag" W0 + WHERE W0."repository_id" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s))) + + +-- ComponentComparison +DELETE +FROM "compare_componentcomparison" +WHERE "compare_componentcomparison"."commit_comparison_id" IN + (SELECT X0."id" + FROM "compare_commitcomparison" X0 + WHERE (X0."base_commit_id" IN + (SELECT W0."id" + FROM "commits" W0 + WHERE W0."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s))) + OR X0."compare_commit_id" IN + (SELECT W0."id" + FROM "commits" W0 + WHERE W0."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s))))) + + +-- CommitError +DELETE +FROM "core_commiterror" +WHERE "core_commiterror"."commit_id" IN + (SELECT W0."id" + FROM "commits" W0 + WHERE W0."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s))) + + +-- LabelAnalysisProcessingError +DELETE +FROM "labelanalysis_labelanalysisprocessingerror" +WHERE "labelanalysis_labelanalysisprocessingerror"."label_analysis_request_id" IN + (SELECT X0."id" + FROM "labelanalysis_labelanalysisrequest" X0 + WHERE (X0."base_commit_id" IN + (SELECT W0."id" + FROM "commits" W0 + WHERE W0."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s))) + OR X0."head_commit_id" IN + (SELECT W0."id" + FROM "commits" W0 + WHERE W0."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s))))) + + +-- ReportResults +DELETE +FROM "reports_reportresults" +WHERE "reports_reportresults"."report_id" IN + (SELECT X0."id" + FROM "reports_commitreport" X0 + WHERE X0."commit_id" IN + (SELECT W0."id" + FROM "commits" W0 + WHERE W0."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s)))) + + +-- ReportDetails +DELETE +FROM "reports_reportdetails" +WHERE "reports_reportdetails"."report_id" IN + (SELECT X0."id" + FROM "reports_commitreport" X0 + WHERE X0."commit_id" IN + (SELECT W0."id" + FROM "commits" W0 + WHERE W0."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s)))) + + +-- ReportLevelTotals +DELETE +FROM "reports_reportleveltotals" +WHERE "reports_reportleveltotals"."report_id" IN + (SELECT X0."id" + FROM "reports_commitreport" X0 + WHERE X0."commit_id" IN + (SELECT W0."id" + FROM "commits" W0 + WHERE W0."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s)))) + + +-- UploadError +DELETE +FROM "reports_uploaderror" +WHERE "reports_uploaderror"."upload_id" IN + (SELECT Y0."id" + FROM "reports_upload" Y0 + WHERE Y0."report_id" IN + (SELECT X0."id" + FROM "reports_commitreport" X0 + WHERE X0."commit_id" IN + (SELECT W0."id" + FROM "commits" W0 + WHERE W0."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s))))) + + +-- UploadFlagMembership +DELETE +FROM "reports_uploadflagmembership" +WHERE "reports_uploadflagmembership"."flag_id" IN + (SELECT W0."id" + FROM "reports_repositoryflag" W0 + WHERE W0."repository_id" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s))) + + +-- UploadLevelTotals +DELETE +FROM "reports_uploadleveltotals" +WHERE "reports_uploadleveltotals"."upload_id" IN + (SELECT Y0."id" + FROM "reports_upload" Y0 + WHERE Y0."report_id" IN + (SELECT X0."id" + FROM "reports_commitreport" X0 + WHERE X0."commit_id" IN + (SELECT W0."id" + FROM "commits" W0 + WHERE W0."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s))))) + + +-- TestResultReportTotals +DELETE +FROM "reports_testresultreporttotals" +WHERE "reports_testresultreporttotals"."report_id" IN + (SELECT X0."id" + FROM "reports_commitreport" X0 + WHERE X0."commit_id" IN + (SELECT W0."id" + FROM "commits" W0 + WHERE W0."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s)))) + + +-- StaticAnalysisSuiteFilepath +DELETE +FROM "staticanalysis_staticanalysissuitefilepath" +WHERE "staticanalysis_staticanalysissuitefilepath"."file_snapshot_id" IN + (SELECT W0."id" + FROM "staticanalysis_staticanalysissinglefilesnapshot" W0 + WHERE W0."repository_id" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s))) + + +-- Pull +DELETE +FROM "pulls" +WHERE "pulls"."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s)) + + +-- ProfilingUpload +DELETE +FROM "profiling_profilingupload" +WHERE "profiling_profilingupload"."profiling_commit_id" IN + (SELECT W0."id" + FROM "profiling_profilingcommit" W0 + WHERE W0."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s))) + + +-- TestFlagBridge +DELETE +FROM "reports_test_results_flag_bridge" +WHERE "reports_test_results_flag_bridge"."test_id" IN + (SELECT W0."id" + FROM "reports_test" W0 + WHERE W0."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s))) + + +-- Flake +DELETE +FROM "reports_flake" +WHERE "reports_flake"."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s)) + + +-- LastCacheRollupDate +DELETE +FROM "reports_lastrollupdate" +WHERE "reports_lastrollupdate"."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s)) + + +-- GithubAppInstallation +DELETE +FROM "codecov_auth_githubappinstallation" +WHERE "codecov_auth_githubappinstallation"."owner_id" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s) + + +-- CommitComparison +DELETE +FROM "compare_commitcomparison" +WHERE ("compare_commitcomparison"."base_commit_id" IN + (SELECT W0."id" + FROM "commits" W0 + WHERE W0."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s))) + OR "compare_commitcomparison"."compare_commit_id" IN + (SELECT W0."id" + FROM "commits" W0 + WHERE W0."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s)))) + + +-- LabelAnalysisRequest +DELETE +FROM "labelanalysis_labelanalysisrequest" +WHERE ("labelanalysis_labelanalysisrequest"."base_commit_id" IN + (SELECT W0."id" + FROM "commits" W0 + WHERE W0."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s))) + OR "labelanalysis_labelanalysisrequest"."head_commit_id" IN + (SELECT W0."id" + FROM "commits" W0 + WHERE W0."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s)))) + + +-- ReportSession +DELETE +FROM "reports_upload" +WHERE "reports_upload"."report_id" IN + (SELECT X0."id" + FROM "reports_commitreport" X0 + WHERE X0."commit_id" IN + (SELECT W0."id" + FROM "commits" W0 + WHERE W0."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s)))) + + +-- StaticAnalysisSuite +DELETE +FROM "staticanalysis_staticanalysissuite" +WHERE "staticanalysis_staticanalysissuite"."commit_id" IN + (SELECT W0."id" + FROM "commits" W0 + WHERE W0."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s))) + + +-- StaticAnalysisSingleFileSnapshot +DELETE +FROM "staticanalysis_staticanalysissinglefilesnapshot" +WHERE "staticanalysis_staticanalysissinglefilesnapshot"."repository_id" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s)) + + +-- ProfilingCommit +DELETE +FROM "profiling_profilingcommit" +WHERE "profiling_profilingcommit"."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s)) + + +-- RepositoryFlag +DELETE +FROM "reports_repositoryflag" +WHERE "reports_repositoryflag"."repository_id" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s)) + + +-- Test +DELETE +FROM "reports_test" +WHERE "reports_test"."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s)) + + +-- ReducedError +DELETE +FROM "reports_reducederror" +WHERE "reports_reducederror"."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s)) + + +-- CommitReport +DELETE +FROM "reports_commitreport" +WHERE "reports_commitreport"."commit_id" IN + (SELECT W0."id" + FROM "commits" W0 + WHERE W0."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s))) + + +-- Commit +DELETE +FROM "commits" +WHERE "commits"."repoid" IN + (SELECT V0."repoid" + FROM "repos" V0 + WHERE V0."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s)) + + +-- Repository +DELETE +FROM "repos" +WHERE "repos"."ownerid" IN + (SELECT U0."ownerid" + FROM "owners" U0 + WHERE U0."ownerid" = %s) + + +-- Owner +DELETE +FROM "owners" +WHERE "owners"."ownerid" = %s diff --git a/services/cleanup/tests/snapshots/relations__builds_delete_queries__repository.txt b/services/cleanup/tests/snapshots/relations__builds_delete_queries__repository.txt new file mode 100644 index 000000000..bac493729 --- /dev/null +++ b/services/cleanup/tests/snapshots/relations__builds_delete_queries__repository.txt @@ -0,0 +1,422 @@ +-- TestInstance +DELETE +FROM "reports_testinstance" +WHERE "reports_testinstance"."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s) + + +-- DailyTestRollup +DELETE +FROM "reports_dailytestrollups" +WHERE "reports_dailytestrollups"."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s) + + +-- RepositoryToken +DELETE +FROM "codecov_auth_repositorytoken" +WHERE "codecov_auth_repositorytoken"."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s) + + +-- Branch +DELETE +FROM "branches" +WHERE "branches"."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s) + + +-- FlagComparison +DELETE +FROM "compare_flagcomparison" +WHERE "compare_flagcomparison"."repositoryflag_id" IN + (SELECT V0."id" + FROM "reports_repositoryflag" V0 + WHERE V0."repository_id" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s)) + + +-- ComponentComparison +DELETE +FROM "compare_componentcomparison" +WHERE "compare_componentcomparison"."commit_comparison_id" IN + (SELECT W0."id" + FROM "compare_commitcomparison" W0 + WHERE (W0."base_commit_id" IN + (SELECT V0."id" + FROM "commits" V0 + WHERE V0."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s)) + OR W0."compare_commit_id" IN + (SELECT V0."id" + FROM "commits" V0 + WHERE V0."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s)))) + + +-- CommitNotification +DELETE +FROM "commit_notifications" +WHERE "commit_notifications"."commit_id" IN + (SELECT V0."id" + FROM "commits" V0 + WHERE V0."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s)) + + +-- CommitError +DELETE +FROM "core_commiterror" +WHERE "core_commiterror"."commit_id" IN + (SELECT V0."id" + FROM "commits" V0 + WHERE V0."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s)) + + +-- LabelAnalysisProcessingError +DELETE +FROM "labelanalysis_labelanalysisprocessingerror" +WHERE "labelanalysis_labelanalysisprocessingerror"."label_analysis_request_id" IN + (SELECT W0."id" + FROM "labelanalysis_labelanalysisrequest" W0 + WHERE (W0."base_commit_id" IN + (SELECT V0."id" + FROM "commits" V0 + WHERE V0."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s)) + OR W0."head_commit_id" IN + (SELECT V0."id" + FROM "commits" V0 + WHERE V0."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s)))) + + +-- ReportResults +DELETE +FROM "reports_reportresults" +WHERE "reports_reportresults"."report_id" IN + (SELECT W0."id" + FROM "reports_commitreport" W0 + WHERE W0."commit_id" IN + (SELECT V0."id" + FROM "commits" V0 + WHERE V0."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s))) + + +-- ReportDetails +DELETE +FROM "reports_reportdetails" +WHERE "reports_reportdetails"."report_id" IN + (SELECT W0."id" + FROM "reports_commitreport" W0 + WHERE W0."commit_id" IN + (SELECT V0."id" + FROM "commits" V0 + WHERE V0."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s))) + + +-- ReportLevelTotals +DELETE +FROM "reports_reportleveltotals" +WHERE "reports_reportleveltotals"."report_id" IN + (SELECT W0."id" + FROM "reports_commitreport" W0 + WHERE W0."commit_id" IN + (SELECT V0."id" + FROM "commits" V0 + WHERE V0."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s))) + + +-- UploadError +DELETE +FROM "reports_uploaderror" +WHERE "reports_uploaderror"."upload_id" IN + (SELECT X0."id" + FROM "reports_upload" X0 + WHERE X0."report_id" IN + (SELECT W0."id" + FROM "reports_commitreport" W0 + WHERE W0."commit_id" IN + (SELECT V0."id" + FROM "commits" V0 + WHERE V0."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s)))) + + +-- UploadFlagMembership +DELETE +FROM "reports_uploadflagmembership" +WHERE "reports_uploadflagmembership"."flag_id" IN + (SELECT V0."id" + FROM "reports_repositoryflag" V0 + WHERE V0."repository_id" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s)) + + +-- UploadLevelTotals +DELETE +FROM "reports_uploadleveltotals" +WHERE "reports_uploadleveltotals"."upload_id" IN + (SELECT X0."id" + FROM "reports_upload" X0 + WHERE X0."report_id" IN + (SELECT W0."id" + FROM "reports_commitreport" W0 + WHERE W0."commit_id" IN + (SELECT V0."id" + FROM "commits" V0 + WHERE V0."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s)))) + + +-- TestResultReportTotals +DELETE +FROM "reports_testresultreporttotals" +WHERE "reports_testresultreporttotals"."report_id" IN + (SELECT W0."id" + FROM "reports_commitreport" W0 + WHERE W0."commit_id" IN + (SELECT V0."id" + FROM "commits" V0 + WHERE V0."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s))) + + +-- StaticAnalysisSuiteFilepath +DELETE +FROM "staticanalysis_staticanalysissuitefilepath" +WHERE "staticanalysis_staticanalysissuitefilepath"."file_snapshot_id" IN + (SELECT V0."id" + FROM "staticanalysis_staticanalysissinglefilesnapshot" V0 + WHERE V0."repository_id" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s)) + + +-- Pull +DELETE +FROM "pulls" +WHERE "pulls"."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s) + + +-- ProfilingUpload +DELETE +FROM "profiling_profilingupload" +WHERE "profiling_profilingupload"."profiling_commit_id" IN + (SELECT V0."id" + FROM "profiling_profilingcommit" V0 + WHERE V0."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s)) + + +-- TestFlagBridge +DELETE +FROM "reports_test_results_flag_bridge" +WHERE "reports_test_results_flag_bridge"."test_id" IN + (SELECT V0."id" + FROM "reports_test" V0 + WHERE V0."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s)) + + +-- Flake +DELETE +FROM "reports_flake" +WHERE "reports_flake"."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s) + + +-- LastCacheRollupDate +DELETE +FROM "reports_lastrollupdate" +WHERE "reports_lastrollupdate"."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s) + + +-- CommitComparison +DELETE +FROM "compare_commitcomparison" +WHERE ("compare_commitcomparison"."base_commit_id" IN + (SELECT V0."id" + FROM "commits" V0 + WHERE V0."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s)) + OR "compare_commitcomparison"."compare_commit_id" IN + (SELECT V0."id" + FROM "commits" V0 + WHERE V0."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s))) + + +-- LabelAnalysisRequest +DELETE +FROM "labelanalysis_labelanalysisrequest" +WHERE ("labelanalysis_labelanalysisrequest"."base_commit_id" IN + (SELECT V0."id" + FROM "commits" V0 + WHERE V0."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s)) + OR "labelanalysis_labelanalysisrequest"."head_commit_id" IN + (SELECT V0."id" + FROM "commits" V0 + WHERE V0."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s))) + + +-- ReportSession +DELETE +FROM "reports_upload" +WHERE "reports_upload"."report_id" IN + (SELECT W0."id" + FROM "reports_commitreport" W0 + WHERE W0."commit_id" IN + (SELECT V0."id" + FROM "commits" V0 + WHERE V0."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s))) + + +-- StaticAnalysisSuite +DELETE +FROM "staticanalysis_staticanalysissuite" +WHERE "staticanalysis_staticanalysissuite"."commit_id" IN + (SELECT V0."id" + FROM "commits" V0 + WHERE V0."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s)) + + +-- StaticAnalysisSingleFileSnapshot +DELETE +FROM "staticanalysis_staticanalysissinglefilesnapshot" +WHERE "staticanalysis_staticanalysissinglefilesnapshot"."repository_id" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s) + + +-- ProfilingCommit +DELETE +FROM "profiling_profilingcommit" +WHERE "profiling_profilingcommit"."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s) + + +-- RepositoryFlag +DELETE +FROM "reports_repositoryflag" +WHERE "reports_repositoryflag"."repository_id" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s) + + +-- Test +DELETE +FROM "reports_test" +WHERE "reports_test"."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s) + + +-- ReducedError +DELETE +FROM "reports_reducederror" +WHERE "reports_reducederror"."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s) + + +-- CommitReport +DELETE +FROM "reports_commitreport" +WHERE "reports_commitreport"."commit_id" IN + (SELECT V0."id" + FROM "commits" V0 + WHERE V0."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s)) + + +-- Commit +DELETE +FROM "commits" +WHERE "commits"."repoid" IN + (SELECT U0."repoid" + FROM "repos" U0 + WHERE U0."repoid" = %s) + + +-- Repository +DELETE +FROM "repos" +WHERE "repos"."repoid" = %s diff --git a/services/cleanup/tests/test_relations.py b/services/cleanup/tests/test_relations.py new file mode 100644 index 000000000..2594256a0 --- /dev/null +++ b/services/cleanup/tests/test_relations.py @@ -0,0 +1,35 @@ +import pytest +import sqlparse +from django.db.models.query import QuerySet +from django.db.models.sql.subqueries import DeleteQuery +from shared.django_apps.codecov_auth.models import Owner +from shared.django_apps.core.models import Repository + +from services.cleanup.relations import build_relation_graph + + +def dump_delete_queries(queryset: QuerySet) -> str: + relations = build_relation_graph(queryset) + + queries = "" + for model, query in relations: + compiler = query.query.chain(DeleteQuery).get_compiler(query.db) + sql, _params = compiler.as_sql() + sql = sqlparse.format(sql, reindent=True, keyword_case="upper") + + if queries: + queries += "\n\n" + queries += f"-- {model.__name__}\n{sql}\n" + + return queries + + +@pytest.mark.django_db +def test_builds_delete_queries(snapshot): + repo = Repository.objects.filter(repoid=123) + org = Owner.objects.filter(ownerid=123) + + # if you change any of the model relations, this snapshot will most likely change. + # in that case, feel free to update this using `pytest --insta update`. + assert dump_delete_queries(repo) == snapshot("repository.txt") + assert dump_delete_queries(org) == snapshot("owner.txt") diff --git a/services/cleanup/utils.py b/services/cleanup/utils.py new file mode 100644 index 000000000..fc212ddc0 --- /dev/null +++ b/services/cleanup/utils.py @@ -0,0 +1,33 @@ +import dataclasses + +import shared.storage +from django.db.models import Model +from shared.api_archive.storage import StorageService +from shared.config import get_config + + +class CleanupContext: + storage: StorageService + default_bucket: str + bundleanalysis_bucket: str + + def __init__(self): + self.storage = shared.storage.get_appropriate_storage_service() + self.default_bucket = get_config( + "services", "minio", "bucket", default="archive" + ) + self.bundleanalysis_bucket = get_config( + "bundle_analysis", "bucket_name", default="bundle-analysis" + ) + + +@dataclasses.dataclass +class CleanupResult: + cleaned_models: int + cleaned_files: int = 0 + + +@dataclasses.dataclass +class CleanupSummary: + totals: CleanupResult + summary: dict[type[Model], CleanupResult] diff --git a/tasks/delete_owner.py b/tasks/delete_owner.py index fa7073164..4a7af4a32 100644 --- a/tasks/delete_owner.py +++ b/tasks/delete_owner.py @@ -1,118 +1,21 @@ import logging -from celery.exceptions import SoftTimeLimitExceeded from shared.celery_config import delete_owner_task_name from app import celery_app -from database.models import Branch, Commit, LoginSession, Owner, Pull, Repository -from database.models.core import CompareCommit -from services.archive import ArchiveService +from services.cleanup.owner import cleanup_owner +from services.cleanup.utils import CleanupSummary from tasks.base import BaseCodecovTask log = logging.getLogger(__name__) class DeleteOwnerTask(BaseCodecovTask, name=delete_owner_task_name): - """ - Delete an owner and their data: - - Repo archive data for each of their owned repos - - Owner entry from db - - Cascading deletes of repos, pulls, and branches for the owner - """ + acks_late = True # retry the task when the worker dies for whatever reason + max_retries = None # aka, no limit on retries - def run_impl(self, db_session, ownerid): - log.info("Delete owner", extra=dict(ownerid=ownerid)) - owner = db_session.query(Owner).filter(Owner.ownerid == ownerid).first() - - assert owner, "Owner not found" - try: - self.delete_repo_archives(db_session, ownerid) - self.delete_owner_from_orgs(db_session, ownerid) - self.delete_from_database(db_session, owner) - except SoftTimeLimitExceeded: - self.retry(max_retries=3) - - def delete_from_database(self, db_session, owner): - # finally delete the actual owner entry and depending data from other tables - ownerid = owner.ownerid - involved_repos = db_session.query(Repository.repoid).filter( - Repository.ownerid == ownerid - ) - involved_commits = db_session.query(Commit.id_).filter( - Commit.repoid.in_(involved_repos) - ) - log.info("Deleting branches from DB", extra=dict(ownerid=ownerid)) - db_session.query(Branch).filter(Branch.repoid.in_(involved_repos)).delete( - synchronize_session=False - ) - db_session.commit() - - log.info("Deleting commit compare from DB", extra=dict(ownerid=ownerid)) - db_session.query(CompareCommit).filter( - CompareCommit.base_commit_id.in_(involved_commits) - ).delete(synchronize_session=False) - db_session.query(CompareCommit).filter( - CompareCommit.compare_commit_id.in_(involved_commits) - ).delete(synchronize_session=False) - db_session.commit() - - log.info("Deleting pulls from DB", extra=dict(ownerid=ownerid)) - db_session.query(Pull).filter(Pull.repoid.in_(involved_repos)).delete( - synchronize_session=False - ) - db_session.commit() - log.info("Deleting commits from DB", extra=dict(ownerid=ownerid)) - db_session.query(Commit).filter(Commit.repoid.in_(involved_repos)).delete( - synchronize_session=False - ) - db_session.commit() - log.info("Deleting repos from DB", extra=dict(ownerid=ownerid)) - involved_repos.delete() - db_session.commit() - log.info("Setting other owner bots to NULL", extra=dict(ownerid=ownerid)) - db_session.query(Owner).filter(Owner.bot_id == ownerid).update( - {Owner.bot_id: None}, synchronize_session=False - ) - db_session.commit() - log.info( - "Cleaning repos that have this owner as bot", extra=dict(ownerid=ownerid) - ) - db_session.query(Repository.repoid).filter(Repository.bot_id == ownerid).update( - {Repository.bot_id: None}, synchronize_session=False - ) - db_session.commit() - log.info("Deleting sessions from user", extra=dict(ownerid=ownerid)) - db_session.query(LoginSession).filter(LoginSession.ownerid == ownerid).delete() - db_session.commit() - log.info("Deleting owner from DB", extra=dict(ownerid=ownerid)) - db_session.delete(owner) - - def delete_repo_archives(self, db_session, ownerid): - """ - Delete all of the data stored in archives for owned repos - """ - log.info("Deleting chunk files", extra=dict(ownerid=ownerid)) - repos_for_owner = ( - db_session.query(Repository).filter(Repository.ownerid == ownerid).all() - ) - - for repo in repos_for_owner: - archive_service = ArchiveService(repo) - archive_service.delete_repo_files() - - def delete_owner_from_orgs(self, db_session, ownerid): - """ - Remove this owner wherever they exist in the organizations column of the owners table - """ - log.info( - "Removing ownerid from 'organizations' arrays", extra=dict(ownerid=ownerid) - ) - owners_in_org = ( - db_session.query(Owner).filter(Owner.organizations.any(ownerid)).all() - ) - - for owner in owners_in_org: - owner.organizations.remove(ownerid) + def run_impl(self, _db_session, ownerid: int) -> CleanupSummary: + return cleanup_owner(ownerid) RegisteredDeleteOwnerTask = celery_app.register_task(DeleteOwnerTask()) diff --git a/tasks/flush_repo.py b/tasks/flush_repo.py index 7f901ee25..d1253cf5d 100644 --- a/tasks/flush_repo.py +++ b/tasks/flush_repo.py @@ -1,218 +1,19 @@ import logging -from dataclasses import dataclass -from typing import Optional - -import sentry_sdk from app import celery_app -from database.engine import Session -from database.models import ( - Branch, - Commit, - CommitError, - CommitNotification, - CommitReport, - CompareCommit, - CompareFlag, - LabelAnalysisRequest, - Pull, - ReportDetails, - ReportLevelTotals, - ReportResults, - Repository, - RepositoryFlag, - StaticAnalysisSingleFileSnapshot, - StaticAnalysisSuite, - StaticAnalysisSuiteFilepath, - Upload, - UploadError, - UploadLevelTotals, - uploadflagmembership, -) -from services.archive import ArchiveService +from services.cleanup.repository import cleanup_repo +from services.cleanup.utils import CleanupSummary from tasks.base import BaseCodecovTask log = logging.getLogger(__name__) -@dataclass -class FlushRepoTaskReturnType(object): - error: Optional[str] = None - deleted_commits_count: int = 0 - delete_branches_count: int = 0 - deleted_pulls_count: int = 0 - deleted_archives_count: int = 0 - - class FlushRepoTask(BaseCodecovTask, name="app.tasks.flush_repo.FlushRepo"): - @sentry_sdk.trace - def _delete_archive(self, repo: Repository) -> int: - archive_service = ArchiveService(repo) - deleted_archives = archive_service.delete_repo_files() - log.info( - "Deleted archives from storage", - extra=dict(repoid=repo.repoid, deleted_archives_count=deleted_archives), - ) - return deleted_archives - - @sentry_sdk.trace - def _delete_comparisons(self, db_session: Session, commit_ids, repoid: int) -> None: - commit_comparison_ids = db_session.query(CompareCommit.id_).filter( - CompareCommit.base_commit_id.in_(commit_ids) - | CompareCommit.compare_commit_id.in_(commit_ids) - ) - db_session.query(CompareFlag).filter( - CompareFlag.commit_comparison_id.in_(commit_comparison_ids) - ).delete(synchronize_session=False) - db_session.commit() - db_session.query(CompareCommit).filter( - CompareCommit.base_commit_id.in_(commit_ids) - | CompareCommit.compare_commit_id.in_(commit_ids) - ).delete(synchronize_session=False) - db_session.commit() - log.info("Deleted comparisons", extra=dict(repoid=repoid)) - - @sentry_sdk.trace - def _delete_reports(self, db_session: Session, report_ids, repoid: int): - db_session.query(ReportDetails).filter( - ReportDetails.report_id.in_(report_ids) - ).delete(synchronize_session=False) - db_session.query(ReportLevelTotals).filter( - ReportLevelTotals.report_id.in_(report_ids) - ).delete(synchronize_session=False) - db_session.query(ReportResults).filter( - ReportResults.report_id.in_(report_ids) - ).delete(synchronize_session=False) - db_session.commit() - log.info("Deleted reports", extra=dict(repoid=repoid)) - - @sentry_sdk.trace - def _delete_uploads(self, db_session: Session, report_ids, repoid: int): - # uploads - upload_ids = db_session.query(Upload.id_).filter( - Upload.report_id.in_(report_ids) - ) - db_session.query(UploadError).filter( - UploadError.upload_id.in_(upload_ids) - ).delete(synchronize_session=False) - db_session.query(UploadLevelTotals).filter( - UploadLevelTotals.upload_id.in_(upload_ids) - ).delete(synchronize_session=False) - db_session.query(uploadflagmembership).filter( - uploadflagmembership.c.upload_id.in_(upload_ids) - ).delete(synchronize_session=False) - db_session.query(Upload).filter(Upload.report_id.in_(report_ids)).delete( - synchronize_session=False - ) - db_session.commit() - log.info("Deleted uploads", extra=dict(repoid=repoid)) - - @sentry_sdk.trace - def _delete_commit_details(self, db_session: Session, commit_ids, repoid: int): - db_session.query(CommitReport).filter( - CommitReport.commit_id.in_(commit_ids) - ).delete(synchronize_session=False) - db_session.query(RepositoryFlag).filter_by(repository_id=repoid).delete() - db_session.commit() - db_session.query(CommitError).filter( - CommitError.commit_id.in_(commit_ids) - ).delete(synchronize_session=False) - db_session.query(CommitNotification).filter( - CommitNotification.commit_id.in_(commit_ids) - ).delete(synchronize_session=False) - db_session.commit() - log.info("Deleted commit details", extra=dict(repoid=repoid)) - - @sentry_sdk.trace - def _delete_static_analysis(self, db_session: Session, commit_ids, repoid: int): - db_session.query(StaticAnalysisSuite).filter( - StaticAnalysisSuite.commit_id.in_(commit_ids) - ).delete(synchronize_session=False) - snapshot_ids = db_session.query(StaticAnalysisSingleFileSnapshot.id_).filter_by( - repository_id=repoid - ) - db_session.query(StaticAnalysisSuiteFilepath).filter( - StaticAnalysisSuiteFilepath.file_snapshot_id.in_(snapshot_ids) - ).delete(synchronize_session=False) - db_session.query(StaticAnalysisSingleFileSnapshot).filter_by( - repository_id=repoid - ).delete() - db_session.commit() - log.info("Deleted static analysis info", extra=dict(repoid=repoid)) - - @sentry_sdk.trace - def _delete_label_analysis(self, db_session: Session, commit_ids, repoid: int): - db_session.query(LabelAnalysisRequest).filter( - LabelAnalysisRequest.base_commit_id.in_(commit_ids) - | LabelAnalysisRequest.head_commit_id.in_(commit_ids) - ).delete(synchronize_session=False) - db_session.commit() - log.info("Deleted label analysis info", extra=dict(repoid=repoid)) - - @sentry_sdk.trace - def _delete_commits(self, db_session: Session, repoid: int) -> int: - delete_count = ( - db_session.query(Commit) - .filter_by(repoid=repoid) - .delete(synchronize_session=False) - ) - db_session.commit() - - log.info( - "Deleted commits", extra=dict(repoid=repoid, deleted_count=delete_count) - ) - return delete_count - - @sentry_sdk.trace - def _delete_branches(self, db_session: Session, repoid: int) -> int: - deleted_branches = db_session.query(Branch).filter_by(repoid=repoid).delete() - db_session.commit() - log.info("Deleted branches", extra=dict(repoid=repoid)) - return deleted_branches - - @sentry_sdk.trace - def _delete_pulls(self, db_session: Session, repoid: int) -> int: - deleted_pulls = db_session.query(Pull).filter_by(repoid=repoid).delete() - db_session.commit() - log.info("Deleted pulls", extra=dict(repoid=repoid)) - return deleted_pulls - - @sentry_sdk.trace - def run_impl( - self, db_session: Session, *, repoid: int, **kwargs - ) -> FlushRepoTaskReturnType: - log.info("Deleting repo contents", extra=dict(repoid=repoid)) - repo = db_session.query(Repository).filter_by(repoid=repoid).first() - if repo is None: - log.exception("Repo not found", extra=dict(repoid=repoid)) - return FlushRepoTaskReturnType(error="repo not found") - - deleted_archives = self._delete_archive(repo) - commit_ids = db_session.query(Commit.id_).filter_by(repoid=repo.repoid) - self._delete_comparisons(db_session, commit_ids, repoid) - - report_ids = db_session.query(CommitReport.id_).filter( - CommitReport.commit_id.in_(commit_ids) - ) - self._delete_reports(db_session, report_ids, repoid) - self._delete_uploads(db_session, report_ids, repoid) - - self._delete_commit_details(db_session, commit_ids, repoid) - - # TODO: Component comparison - - self._delete_static_analysis(db_session, commit_ids, repoid) + acks_late = True # retry the task when the worker dies for whatever reason + max_retries = None # aka, no limit on retries - deleted_commits = self._delete_commits(db_session, repoid) - deleted_branches = self._delete_branches(db_session, repoid) - deleted_pulls = self._delete_pulls(db_session, repoid) - repo.yaml = None - return FlushRepoTaskReturnType( - deleted_archives_count=deleted_archives, - deleted_commits_count=deleted_commits, - delete_branches_count=deleted_branches, - deleted_pulls_count=deleted_pulls, - ) + def run_impl(self, _db_session, repoid: int) -> CleanupSummary: + return cleanup_repo(repoid) FlushRepo = celery_app.register_task(FlushRepoTask()) diff --git a/tasks/tests/unit/test_delete_owner.py b/tasks/tests/unit/test_delete_owner.py index 067c41ef2..7f8059352 100644 --- a/tasks/tests/unit/test_delete_owner.py +++ b/tasks/tests/unit/test_delete_owner.py @@ -1,212 +1,94 @@ from pathlib import Path import pytest -from celery.exceptions import Retry, SoftTimeLimitExceeded - -from database.models import Branch, Commit, CompareCommit, Owner, Pull, Repository -from database.tests.factories import ( - BranchFactory, +from shared.django_apps.codecov_auth.models import Owner +from shared.django_apps.codecov_auth.tests.factories import OwnerFactory +from shared.django_apps.compare.models import CommitComparison +from shared.django_apps.compare.tests.factories import CommitComparisonFactory +from shared.django_apps.core.models import Branch, Commit, Pull, Repository +from shared.django_apps.core.tests.factories import ( CommitFactory, - CompareCommitFactory, - OwnerFactory, - PullFactory, RepositoryFactory, ) -from services.archive import ArchiveService + +from services.cleanup.utils import CleanupResult, CleanupSummary from tasks.delete_owner import DeleteOwnerTask here = Path(__file__) -class TestDeleteOwnerTaskUnit(object): - def test_unknown_owner(self, mocker, mock_configuration, dbsession): - unknown_ownerid = 10404 - with pytest.raises(AssertionError, match="Owner not found"): - DeleteOwnerTask().run_impl(dbsession, unknown_ownerid) - - def test_delete_owner_deletes_owner_with_ownerid( - self, mocker, mock_configuration, mock_storage, dbsession - ): - ownerid = 10777 - serviceid = "12345" - repoid = 1337 - - user = OwnerFactory.create(ownerid=ownerid, service_id=serviceid) - dbsession.add(user) - - repo = RepositoryFactory.create( - repoid=repoid, name="dracula", service_id="7331", owner=user - ) - dbsession.add(repo) - - commit = CommitFactory.create( - message="", - commitid="abf6d4df662c47e32460020ab14abf9303581429", - repository__owner=user, - ) - dbsession.add(commit) - - branch = BranchFactory.create(repository=repo) - dbsession.add(branch) - - pull = PullFactory.create(repository=repo) - dbsession.add(pull) - - dbsession.flush() - - DeleteOwnerTask().run_impl(dbsession, ownerid) - - owner = dbsession.query(Owner).filter(Owner.ownerid == ownerid).first() - - repos = dbsession.query(Repository).filter(Repository.ownerid == ownerid).all() - - commits = dbsession.query(Commit).filter(Commit.repoid == repoid).all() - - branches = dbsession.query(Branch).filter(Branch.repoid == repoid).all() - - pulls = dbsession.query(Pull).filter(Pull.repoid == repoid).all() - - assert owner is None - assert repos == [] - assert commits == [] - assert branches == [] - assert pulls == [] - - def test_delete_owner_deletes_owner_with_commit_compares( - self, mocker, mock_configuration, mock_storage, dbsession - ): - ownerid = 10777 - serviceid = "12345" - repoid = 1337 - - user = OwnerFactory.create(ownerid=ownerid, service_id=serviceid) - dbsession.add(user) - - repo = RepositoryFactory.create( - repoid=repoid, name="dracula", service_id="7331", owner=user - ) - dbsession.add(repo) - - base_commit_id = 1234 - base_commit = CommitFactory.create( - message="", - commitid="abf6d4df662c47e32460020ab14abf9303581429", - repository__owner=user, - ) - dbsession.add(base_commit) - - compare_commit_id = 1235 - compare_commit = CommitFactory.create( - message="", - commitid="abf6d4df662c47e32460020ab14abf9303581421", - repository__owner=user, - ) - dbsession.add(compare_commit) - - comparison = CompareCommitFactory.create( - base_commit=base_commit, compare_commit=compare_commit - ) - dbsession.add(comparison) - - branch = BranchFactory.create(repository=repo) - dbsession.add(branch) - - pull = PullFactory.create(repository=repo) - dbsession.add(pull) - - dbsession.flush() - - DeleteOwnerTask().run_impl(dbsession, ownerid) - - owner = dbsession.query(Owner).filter(Owner.ownerid == ownerid).first() - - repos = dbsession.query(Repository).filter(Repository.ownerid == ownerid).all() - - commits = dbsession.query(Commit).filter(Commit.repoid == repoid).all() - - branches = dbsession.query(Branch).filter(Branch.repoid == repoid).all() - - pulls = dbsession.query(Pull).filter(Pull.repoid == repoid).all() - - comparisons = ( - dbsession.query(CompareCommit) - .filter( - CompareCommit.base_commit_id == base_commit_id - or CompareCommit.compare_commit_id == compare_commit_id - ) - .all() - ) - - assert owner is None - assert repos == [] - assert commits == [] - assert branches == [] - assert pulls == [] - assert comparisons == [] - - def test_delete_owner_from_orgs_removes_ownerid_from_organizations_of_related_owners( - self, mocker, mock_configuration, mock_storage, dbsession - ): - org = OwnerFactory.create(service_id="9000") - dbsession.add(org) - dbsession.flush() - org_ownerid = org.ownerid - - user_1 = OwnerFactory.create( - ownerid=1001, service_id="9001", organizations=[org_ownerid] - ) - dbsession.add(user_1) - - user_2 = OwnerFactory.create( - ownerid=1002, service_id="9002", organizations=[org_ownerid, user_1.ownerid] - ) - dbsession.add(user_2) - - dbsession.flush() - - DeleteOwnerTask().delete_owner_from_orgs(dbsession, org_ownerid) - - assert user_1.organizations == [] - assert user_2.organizations == [user_1.ownerid] - - def test_delete_owner_deletes_repo_archives_for_each_repo( - self, mocker, mock_configuration, mock_storage, dbsession - ): - ownerid = 10777 - serviceid = "12345" - - user = OwnerFactory.create(ownerid=ownerid, service_id=serviceid) - dbsession.add(user) - - repo_1 = RepositoryFactory.create( - repoid=1337, name="dracula", service_id="7331", owner=user - ) - dbsession.add(repo_1) - - repo_2 = RepositoryFactory.create( - repoid=1338, name="frankenstein", service_id="7332", owner=user - ) - dbsession.add(repo_2) - - dbsession.flush() - - mocked_delete_repo_files = mocker.patch.object( - ArchiveService, "delete_repo_files" - ) - - DeleteOwnerTask().delete_repo_archives(dbsession, ownerid) - - assert mocked_delete_repo_files.call_count == 2 - - def test_delete_owner_timeout( - self, mocker, mock_configuration, mock_storage, dbsession - ): - org = OwnerFactory.create(service_id="9000") - dbsession.add(org) - - dbsession.flush() - mocker.patch.object( - DeleteOwnerTask, "delete_repo_archives", side_effect=SoftTimeLimitExceeded() - ) - with pytest.raises(Retry): - DeleteOwnerTask().run_impl(dbsession, org.ownerid) +@pytest.mark.django_db(transaction=True) +def test_delete_owner_deletes_owner_with_ownerid(mock_storage): + user = OwnerFactory() + repo = RepositoryFactory(author=user) + CommitFactory(repository=repo, author=user) + # NOTE: the commit creates an implicit `Branch` and `Pull` + + res = DeleteOwnerTask().run_impl({}, user.ownerid) + + assert res == CleanupSummary( + CleanupResult(5), + { + Branch: CleanupResult(1), + Commit: CleanupResult(1), + Owner: CleanupResult(1), + Pull: CleanupResult(1), + Repository: CleanupResult(1), + }, + ) + + assert Branch.objects.count() == 0 + assert Commit.objects.count() == 0 + assert Owner.objects.count() == 0 + assert Pull.objects.count() == 0 + assert Repository.objects.count() == 0 + + +@pytest.mark.django_db(transaction=True) +def test_delete_owner_deletes_owner_with_commit_compares(mock_storage): + user = OwnerFactory() + repo = RepositoryFactory(author=user) + + base_commit = CommitFactory(repository=repo, author=user) + compare_commit = CommitFactory(repository=repo, author=user) + CommitComparisonFactory(base_commit=base_commit, compare_commit=compare_commit) + + res = DeleteOwnerTask().run_impl({}, user.ownerid) + + assert res == CleanupSummary( + CleanupResult(7), + { + Branch: CleanupResult(1), + Commit: CleanupResult(2), + CommitComparison: CleanupResult(1), + Owner: CleanupResult(1), + Pull: CleanupResult(1), + Repository: CleanupResult(1), + }, + ) + + assert Branch.objects.count() == 0 + assert Commit.objects.count() == 0 + assert CommitComparison.objects.count() == 0 + assert Owner.objects.count() == 0 + assert Pull.objects.count() == 0 + assert Repository.objects.count() == 0 + + +@pytest.mark.django_db(transaction=True) +def test_delete_owner_from_orgs_removes_ownerid_from_organizations_of_related_owners( + mock_storage, +): + org = OwnerFactory() + + user_1 = OwnerFactory(organizations=[org.ownerid]) + user_2 = OwnerFactory(organizations=[org.ownerid, user_1.ownerid]) + + res = DeleteOwnerTask().run_impl({}, org.ownerid) + + assert res.summary[Owner] == CleanupResult(1) + + user_1 = Owner.objects.get(pk=user_1.ownerid) + assert user_1.organizations == [] + user_2 = Owner.objects.get(pk=user_2.ownerid) + assert user_2.organizations == [user_1.ownerid] diff --git a/tasks/tests/unit/test_flush_repo.py b/tasks/tests/unit/test_flush_repo.py index 291edfcfc..a6676d7f3 100644 --- a/tasks/tests/unit/test_flush_repo.py +++ b/tasks/tests/unit/test_flush_repo.py @@ -1,118 +1,140 @@ -from database.tests.factories import ( +import pytest +from shared.bundle_analysis import StoragePaths +from shared.django_apps.compare.models import CommitComparison, FlagComparison +from shared.django_apps.compare.tests.factories import ( + CommitComparisonFactory, + FlagComparisonFactory, +) +from shared.django_apps.core.models import Branch, Commit, Pull, Repository +from shared.django_apps.core.tests.factories import ( BranchFactory, CommitFactory, - CompareCommitFactory, PullFactory, RepositoryFactory, ) -from database.tests.factories.reports import CompareFlagFactory, RepositoryFlagFactory +from shared.django_apps.reports.models import CommitReport, RepositoryFlag +from shared.django_apps.reports.models import ReportSession as Upload +from shared.django_apps.reports.tests.factories import ( + CommitReportFactory, + RepositoryFlagFactory, + UploadFactory, +) + from services.archive import ArchiveService -from tasks.flush_repo import FlushRepoTask, FlushRepoTaskReturnType - - -class TestFlushRepo(object): - def test_flush_repo_nothing(self, dbsession, mock_storage): - task = FlushRepoTask() - repo = RepositoryFactory.create() - dbsession.add(repo) - dbsession.flush() - res = task.run_impl(dbsession, repoid=repo.repoid) - assert res == FlushRepoTaskReturnType( - **{ - "delete_branches_count": 0, - "deleted_archives_count": 0, - "deleted_commits_count": 0, - "deleted_pulls_count": 0, - } - ) +from services.cleanup.utils import CleanupResult, CleanupSummary +from tasks.flush_repo import FlushRepoTask - def test_flush_repo_few_of_each_only_db_objects(self, dbsession, mock_storage): - task = FlushRepoTask() - repo = RepositoryFactory.create() - dbsession.add(repo) - dbsession.flush() - flag = RepositoryFlagFactory.create(repository=repo) - dbsession.add(flag) - for i in range(8): - commit = CommitFactory.create(repository=repo) - dbsession.add(commit) - for i in range(4): - base_commit = CommitFactory.create(repository=repo) - head_commit = CommitFactory.create(repository=repo) - comparison = CompareCommitFactory.create( - base_commit=base_commit, compare_commit=head_commit - ) - dbsession.add(base_commit) - dbsession.add(head_commit) - dbsession.add(comparison) - - flag_comparison = CompareFlagFactory.create( - commit_comparison=comparison, repositoryflag=flag - ) - dbsession.add(flag_comparison) - for i in range(17): - pull = PullFactory.create(repository=repo, pullid=i + 100) - dbsession.add(pull) - for i in range(23): - branch = BranchFactory.create(repository=repo) - dbsession.add(branch) - dbsession.flush() - res = task.run_impl(dbsession, repoid=repo.repoid) - assert res == FlushRepoTaskReturnType( - **{ - "delete_branches_count": 23, - "deleted_archives_count": 0, - "deleted_commits_count": 16, - "deleted_pulls_count": 17, - } - ) - def test_flush_repo_only_archives(self, dbsession, mock_storage): - repo = RepositoryFactory.create() - dbsession.add(repo) - dbsession.flush() - archive_service = ArchiveService(repo) - for i in range(4): - archive_service.write_chunks(f"commit_sha{i}", f"data{i}") - task = FlushRepoTask() - res = task.run_impl(dbsession, repoid=repo.repoid) - assert res == FlushRepoTaskReturnType( - **{ - "delete_branches_count": 0, - "deleted_archives_count": 4, - "deleted_commits_count": 0, - "deleted_pulls_count": 0, - } +@pytest.mark.django_db +def test_flush_repo_nothing(mock_storage): + repo = RepositoryFactory() + + task = FlushRepoTask() + res = task.run_impl({}, repoid=repo.repoid) + + assert res == CleanupSummary( + CleanupResult(1), + { + Repository: CleanupResult(1), + }, + ) + + +@pytest.mark.django_db +def test_flush_repo_few_of_each_only_db_objects(mock_storage): + repo = RepositoryFactory() + flag = RepositoryFlagFactory(repository=repo) + + for i in range(8): + CommitFactory(repository=repo) + + for i in range(4): + base_commit = CommitFactory(repository=repo) + head_commit = CommitFactory(repository=repo) + comparison = CommitComparisonFactory( + base_commit=base_commit, compare_commit=head_commit ) - def test_flush_repo_little_bit_of_everything(self, dbsession, mock_storage): - repo = RepositoryFactory.create() - dbsession.add(repo) - dbsession.flush() - archive_service = ArchiveService(repo) - for i in range(8): - commit = CommitFactory.create(repository=repo) - dbsession.add(commit) - for i in range(17): - pull = PullFactory.create(repository=repo, pullid=i + 100) - dbsession.add(pull) - for i in range(23): - branch = BranchFactory.create(repository=repo) - dbsession.add(branch) - dbsession.flush() - for i in range(4): - archive_service.write_chunks(f"commit_sha{i}", f"data{i}") - task = FlushRepoTask() - res = task.run_impl(dbsession, repoid=repo.repoid) - assert res == FlushRepoTaskReturnType( - **{ - "delete_branches_count": 23, - "deleted_archives_count": 4, - "deleted_commits_count": 8, - "deleted_pulls_count": 17, - } + FlagComparisonFactory(commit_comparison=comparison, repositoryflag=flag) + + # NOTE: The `CommitFactary` defaults to `branch: main, pullid: 1` + # This default seems to create models for + # `Pull` and `Branch` automatically through some kind of trigger? + + for i in range(17): + PullFactory(repository=repo, pullid=i + 100) + + for i in range(23): + BranchFactory(repository=repo) + + task = FlushRepoTask() + res = task.run_impl({}, repoid=repo.repoid) + + assert res == CleanupSummary( + CleanupResult(24 + 16 + 4 + 4 + 18 + 1 + 1), + { + Branch: CleanupResult(24), + Commit: CleanupResult(16), + CommitComparison: CleanupResult(4), + FlagComparison: CleanupResult(4), + Pull: CleanupResult(18), + Repository: CleanupResult(1), + RepositoryFlag: CleanupResult(1), + }, + ) + + +@pytest.mark.django_db +def test_flush_repo_little_bit_of_everything(mocker, mock_storage): + repo = RepositoryFactory() + archive_service = ArchiveService(repo) + + for i in range(8): + # NOTE: `CommitWithReportFactory` exists, but its only usable from `api`, + # because of unresolved imports + commit = CommitFactory(repository=repo) + commit_report = CommitReportFactory(commit=commit) + upload = UploadFactory(report=commit_report, storage_path=f"upload{i}") + + archive_service.write_chunks(commit.commitid, f"chunks_data{i}") + archive_service.write_file(upload.storage_path, f"upload_data{i}") + + ba_report = CommitReportFactory(commit=commit, report_type="bundle_analysis") + ba_upload = UploadFactory(report=ba_report, storage_path=f"ba_upload{i}") + ba_report_path = StoragePaths.bundle_report.path( + repo_key=archive_service.storage_hash, report_key=ba_report.external_id ) - dbsession.flush() - dbsession.refresh(repo) - # Those assertions are almost tautological. If they start being a - # problem, don't hesitate to delete them + archive_service.storage.write_file( + "bundle-analysis", ba_report_path, f"ba_report_data{i}" + ) + archive_service.storage.write_file( + "bundle-analysis", ba_upload.storage_path, f"ba_upload_data{i}" + ) + + for i in range(17): + PullFactory(repository=repo, pullid=i + 100) + + for i in range(23): + BranchFactory(repository=repo) + + archive = mock_storage.storage["archive"] + ba_archive = mock_storage.storage["bundle-analysis"] + assert len(archive) == 16 + assert len(ba_archive) == 16 + + task = FlushRepoTask() + res = task.run_impl({}, repoid=repo.repoid) + + assert res == CleanupSummary( + CleanupResult(24 + 8 + 16 + 18 + 1 + 16, 16 + 16), + { + Branch: CleanupResult(24), + Commit: CleanupResult(8), + CommitReport: CleanupResult(16, 16), + Pull: CleanupResult(18), + Repository: CleanupResult(1), + Upload: CleanupResult(16, 16), + }, + ) + assert len(archive) == 0 + assert len(ba_archive) == 0