diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 3b681637..3aa4155a 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,14 +4,33 @@ Changelog (Unreleased) ~~~~~~~~~~~~ -* Drop Django 2.2 support. +* **Backwards incompatible:** Tag slugification used to silently strip non-ASCII characters + from the tag name to make the slug. This leads to a lot of confusion for anyone using + languages with non-latin alphabets, as well as weird performance issues. + + Tag slugification will now, by default, maintain unicode characters as-is during + slugification. This will lead to less surprises, but might cause issues for you if you are + expecting all of your tag slugs to fit within a regex like ``[a-zA-Z0-9]`` (for example in + URL routing configurations). + + Generally speaking, this should not require action on your part as a library user, as + existing tag slugs are persisted in the database, and only new tags will receive the + enhanced unicode-compatible slug. + + If you wish to maintain the old stripping behavior, set the setting + ``TAGGIT_STRIP_UNICODE_WHEN_SLUGIFYING`` to ``True``. + + As a reminder, custom tag models can easily customize slugification behavior by overriding + the ``slugify`` method to your business needs. + +`` Drop Django 2.2 support. 2.1.0 (2022-01-24) ~~~~~~~~~~~~~~~~~~ -* Add Python 3.10 support. -* Add Django 4.0 support. -* Drop Django 3.1 support. +`` Add Python 3.10 support. +`` Add Django 4.0 support. +`` Drop Django 3.1 support. 2.0.0 (2021-11-14) @@ -23,7 +42,7 @@ Changelog - previously: ``item.tags.set("red", "blue")`` - now: ``item.tags.set(["red", "blue"])`` -* Fix issue where ``TagField`` would incorrectly report that a field has changed on empty values. +** Fix issue where ``TagField`` would incorrectly report that a field has changed on empty values. * Update Russian translation. * Add Persian translation * Fix issue for many languages where content types were not being properly translated. diff --git a/docs/faq.rst b/docs/faq.rst index d4cd3f10..f8f9b19a 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -13,7 +13,8 @@ Frequently Asked Questions One way to handle this is with post-generation hooks:: - class ProductFactory(DjangoModelFactory): + + class ProductFactory(DjangoModelFactory): # Rest of the stuff @post_generation diff --git a/docs/getting_started.rst b/docs/getting_started.rst index 0589b2af..a641bb05 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -27,3 +27,23 @@ And then to any model you want tagging on do the following:: If you want ``django-taggit`` to be **CASE-INSENSITIVE** when looking up existing tags, you'll have to set ``TAGGIT_CASE_INSENSITIVE`` (in ``settings.py`` or wherever you have your Django settings) to ``True`` (``False`` by default):: TAGGIT_CASE_INSENSITIVE = True + + +Settings +-------- + +The following Django-level settings affect the behavior of the library + +* ``TAGGIT_CASE_INSENSITIVE`` + + When set to ``True``, tag lookups will be case insensitive. This defaults to ``False``. + +`` ``TAGGIT_STRIP_UNICODE_WHEN_SLUGIFYING`` + When this is set to ``True``, tag slugs will be limited to ASCII characters. In this case, if you also have ```unidecode`` installed, + then tag sluggification will transform a tag like ``あい うえお`` to ``ai-ueo``. + If you do not have ``unidecode`` installed, then you will usually be outright stripping unicode, meaning that something like ``helloあい`` will be slugified as ``hello``. + + This value defaults to ``False``, meaning that unicode is preserved in slugification. + + Because the behavior when ``True`` is set leads to situations where + slugs can be entirely stripped to an empty string, we recommend not activating this. diff --git a/taggit/migrations/0005_auto_20220424_2025.py b/taggit/migrations/0005_auto_20220424_2025.py new file mode 100644 index 00000000..c64659da --- /dev/null +++ b/taggit/migrations/0005_auto_20220424_2025.py @@ -0,0 +1,20 @@ +# Generated by Django 2.2.26 on 2022-04-24 20:25 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("taggit", "0004_alter_taggeditem_content_type_alter_taggeditem_tag"), + ] + + operations = [ + migrations.AlterField( + model_name="tag", + name="slug", + field=models.SlugField( + allow_unicode=True, max_length=100, unique=True, verbose_name="slug" + ), + ), + ] diff --git a/taggit/models.py b/taggit/models.py index 1d457607..05bca49f 100644 --- a/taggit/models.py +++ b/taggit/models.py @@ -1,3 +1,4 @@ +from django.conf import settings from django.contrib.contenttypes.fields import GenericForeignKey from django.contrib.contenttypes.models import ContentType from django.db import IntegrityError, models, router, transaction @@ -19,7 +20,10 @@ class TagBase(models.Model): verbose_name=pgettext_lazy("A tag name", "name"), unique=True, max_length=100 ) slug = models.SlugField( - verbose_name=pgettext_lazy("A tag slug", "slug"), unique=True, max_length=100 + verbose_name=pgettext_lazy("A tag slug", "slug"), + unique=True, + max_length=100, + allow_unicode=True, ) def __str__(self): @@ -71,7 +75,10 @@ def save(self, *args, **kwargs): return super().save(*args, **kwargs) def slugify(self, tag, i=None): - slug = slugify(unidecode(tag)) + if getattr(settings, "TAGGIT_STRIP_UNICODE_WHEN_SLUGIFYING", False): + slug = slugify(unidecode(tag)) + else: + slug = slugify(tag, allow_unicode=True) if i is not None: slug += "_%d" % i return slug diff --git a/tests/test_models.py b/tests/test_models.py new file mode 100644 index 00000000..b469c33d --- /dev/null +++ b/tests/test_models.py @@ -0,0 +1,27 @@ +from django.test import TestCase, override_settings + +from tests.models import TestModel + + +class TestSlugification(TestCase): + def test_unicode_slugs(self): + """ + Confirm the preservation of unicode in slugification by default + """ + sample_obj = TestModel.objects.create() + # a unicode tag will be slugified for space reasons but + # unicode-ness will be kept by default + sample_obj.tags.add("あい うえお") + self.assertEqual([tag.slug for tag in sample_obj.tags.all()], ["あい-うえお"]) + + def test_old_slugs(self): + """ + Test that the setting that gives us the old slugification behavior + is in place + """ + with override_settings(TAGGIT_STRIP_UNICODE_WHEN_SLUGIFYING=True): + sample_obj = TestModel.objects.create() + # a unicode tag will be slugified for space reasons but + # unicode-ness will be kept by default + sample_obj.tags.add("あい うえお") + self.assertEqual([tag.slug for tag in sample_obj.tags.all()], [""])