Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

531: List duplicated words on dashboard #552

Draft
wants to merge 1 commit into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lunes_cms/api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def find_duplicates_for_word(request, word):
if duplicate.definition
else _("Definition: ") + _("No definition is provided for this word.")
),
"training_sets": _("Training sets: ") + training_sets_description,
"training_sets": _("Training sets") + ": " + training_sets_description,
}

return JsonResponse(result)
Expand Down
40 changes: 31 additions & 9 deletions lunes_cms/cms/admin.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,44 @@
"""
Register models for Django's CRUD back end and
Register models for Django"s CRUD back end and
specify autocomplete_fields, search_fields and nested modules
"""
from __future__ import absolute_import, unicode_literals

from django.contrib import admin
from django.utils.translation import gettext_lazy as _

from .admins import (
DisciplineAdmin,
DocumentAdmin,
FeedbackAdmin,
GroupAPIKeyAdmin,
SponsorAdmin,
TrainingSetAdmin,
)
from .admin.discipline import DisciplineAdmin
from .admin.document import DocumentAdmin
from .admin.document.duplicates import get_duplicate_vocabularies
from .admin.feedback import FeedbackAdmin
from .admin.group_api_key import GroupAPIKeyAdmin
from .admin.sponsor import SponsorAdmin
from .admin.training_set import TrainingSetAdmin
from .models import Discipline, Document, Feedback, GroupAPIKey, Sponsor, TrainingSet


def each_context(self, request):
"""
Return a dictionary of variables to put in the template context for
*every* page in the admin site.

For sites running on a subpath, use the SCRIPT_NAME value if site_url
hasn't been customized.
"""
script_name = request.META["SCRIPT_NAME"]
site_url = script_name if self.site_url == "/" and script_name else self.site_url
return {
"site_title": self.site_title,
"site_header": self.site_header,
"site_url": site_url,
"has_permission": self.has_permission(request),
"available_apps": self.get_app_list(request),
"is_popup": False,
"is_nav_sidebar_enabled": self.enable_nav_sidebar,
"duplicate_vocabularies": get_duplicate_vocabularies(),
}


def get_app_list(self, request):
"""
Function that returns a sorted list of all the installed apps that have been
Expand Down Expand Up @@ -54,6 +75,7 @@ def get_app_list(self, request):
return app_list


admin.AdminSite.each_context = each_context
admin.AdminSite.get_app_list = get_app_list
admin.site.register(Discipline, DisciplineAdmin)
admin.site.register(TrainingSet, TrainingSetAdmin)
Expand Down
Empty file added lunes_cms/cms/admin/__init__.py
Empty file.
9 changes: 9 additions & 0 deletions lunes_cms/cms/admin/discipline/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from .discipline_admin import DisciplineAdmin
from .form import DisciplineChoiceField
from .list_filter import DisciplineListFilter

__all__ = [
"DisciplineAdmin",
"DisciplineChoiceField",
"DisciplineListFilter",
]
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
from mptt.admin import DraggableMPTTAdmin
from tablib import Dataset

from ..models import Discipline, Document, Static
from .document_resource import DocumentResource
from ..document import DocumentResource
from ...models import Discipline, Document, Static


class DisciplineAdmin(DraggableMPTTAdmin):
Expand Down
17 changes: 17 additions & 0 deletions lunes_cms/cms/admin/discipline/form.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from django import forms


class DisciplineChoiceField(forms.ModelMultipleChoiceField):
"""
Custom form field in order to include parent nodes in string representation.
Inherits from `forms.ModelMultipleChocieField`.
"""

def label_from_instance(self, obj):
if obj.parent:
ancestors = [
node.title for node in obj.parent.get_ancestors(include_self=True)
]
ancestors.append(obj.title)
return " \u2794 ".join(ancestors)
return obj.title
70 changes: 70 additions & 0 deletions lunes_cms/cms/admin/discipline/list_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from django.contrib import admin
from django.db.models import F
from django.utils.translation import gettext_lazy as _

from ...models import Discipline


class DisciplineListFilter(admin.SimpleListFilter):
"""
Generic Filter for models that have a direct relationship to disciplines.
Inherits from `admin.SimpleListFilter`.
"""

title = _("disciplines")

# Parameter for the filter that will be used in the URL query.
parameter_name = "disciplines"

template = "admin/discipline_filter.html"

def lookups(self, request, model_admin):
"""
Defining look up values that can be seen in the admin
interface. Returns tuples: the first element is a coded
value, whereas the second one is human-readable.
:param request: current user request
:type request: django.http.request
:param model_admin: admin of current model
:type model_admin: ModelAdmin
:return: list of tuples containing id and title of each discipline
:rtype: list
"""

# Verify that only disciplines are displayed that actually can contain training sets
queryset = Discipline.objects.filter(lft=F("rght") - 1)

if "training set" in request.GET:
queryset = queryset.filter(training_sets=request.GET["training set"])

if request.user.is_superuser:
queryset = queryset.filter(creator_is_admin=True)
else:
queryset = queryset.filter(created_by__in=request.user.groups.all())

list_of_disciplines = [
(
str(discipline.id),
f"{discipline.parent} \u2794 {discipline}",
)
for discipline in queryset
]
return sorted(list_of_disciplines, key=lambda tp: tp[1])

def queryset(self, request, queryset):
"""
Returns the filtered queryset based on the value
provided in the query string and retrievable via
`self.value()`.
:param request: current user request
:type request: django.http.request
:param queryset: current queryset
:type queryset: QuerySet
:return: filtered queryset based on the value provided in the query string
:rtype: QuerySet
"""
if self.value():
return queryset.filter(discipline=self.value()).distinct()
return queryset
17 changes: 17 additions & 0 deletions lunes_cms/cms/admin/document/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from .document_admin import DocumentAdmin
from .document_resource import DocumentResource
from .list_filter import (
ApprovedImageListFilter,
AssignedListFilter,
DocumentDisciplineListFilter,
DocumentTrainingSetListFilter,
)

__all__ = [
"ApprovedImageListFilter",
"AssignedListFilter",
"DocumentAdmin",
"DocumentResource",
"DocumentDisciplineListFilter",
"DocumentTrainingSetListFilter",
]
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from django.contrib import admin

from ..models import AlternativeWord
from ...models import AlternativeWord


class AlternativeWordAdmin(admin.StackedInline):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
from django.db.models import Case, Exists, IntegerField, OuterRef, Value, When
from django.utils.translation import gettext_lazy as _

from ..list_filter import (
from .list_filter import (
ApprovedImageListFilter,
AssignedListFilter,
DocumentDisciplineListFilter,
DocumentTrainingSetListFilter,
AssignedListFilter,
)
from ..models import DocumentImage, Static
from .alternative_word_admin import AlternativeWordAdmin
from .document_image_admin import DocumentImageAdmin
from ...models import DocumentImage, Static

SUPERUSER_ONLY_LIST_FILTERS = [ApprovedImageListFilter]

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from django.contrib import admin

from ..models import DocumentImage
from ...models import DocumentImage


class DocumentImageAdmin(admin.StackedInline):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
from import_export import fields, resources
from import_export.admin import ExportActionMixin

from ..models import Document
from ..models.static import Static
from ...models import Document
from ...models.static import Static


class DocumentResource(resources.ModelResource):
Expand Down
5 changes: 5 additions & 0 deletions lunes_cms/cms/admin/document/duplicates/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .get_duplicates import get_duplicate_vocabularies

__all__ = [
"get_duplicate_vocabularies",
]
79 changes: 79 additions & 0 deletions lunes_cms/cms/admin/document/duplicates/get_duplicates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from collections import defaultdict
from typing import TypedDict, List

from django.db.models import Count
from django.db.models.functions import Lower

from lunes_cms.cms.models import Document


class DuplicateVocabulary(TypedDict):
"""
Dictionary type representing a duplicate vocabulary group
"""

word: str
word_type: str
documents: List[Document]


class DuplicateVocabularyDocument(TypedDict):
"""
Dictionary type representing a document with a duplicate vocabulary group
"""

id: str
word: str
training_sets: List[str]


def map_to_duplicate_vocabulary_documents(documents) -> [DuplicateVocabularyDocument]:
"""
Maps the result of the get duplicate vocabularies query to DuplicateVocabularyDocument
"""
mapped = []
for document in documents:
training_sets = []
for training_set in document.get("_prefetched_objects_cache").get(
"training_sets", None
):
training_sets.append(training_set.title)
mapped.append(
{"id": document.id, "word": document.word, "training_sets": training_sets}
)
return mapped


def get_duplicate_vocabularies() -> List[DuplicateVocabulary]:
"""
Retrieves duplicate vocabularies from the database
"""
duplicate_groups = (
Document.objects.annotate(lower_word=Lower("word"))
.values("lower_word", "word_type")
.annotate(count=Count("id"))
.filter(count__gt=1)
)
duplicate_vocabularies = Document.objects.prefetch_related("training_sets").filter(
id__in=Document.objects.annotate(lower_word=Lower("word"))
.filter(
lower_word__in=[group["lower_word"] for group in duplicate_groups],
word_type__in=[group["word_type"] for group in duplicate_groups],
)
.values_list("id", flat=True)
)

grouped_duplicate_vocabularies = defaultdict(list)
for v in duplicate_vocabularies:
grouped_duplicate_vocabularies[(v.word.lower(), v.word_type)].append(v)

result = []
for _, value in grouped_duplicate_vocabularies.items():
result.append(
{
"word": value[0].word,
"word_type": value[0].word_type,
"documents": map_to_duplicate_vocabulary_documents(value),
}
)
return result
Loading