Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
translations: use a more distinctive separator
Browse files Browse the repository at this point in the history
I found that the translator would sometimes replace the pipe character
with another symbol (maybe it got confused thinking the character is
part of the text?).

Added spaces around the pipe to make it more clear that it's definitely
the separator.
snejus committed Jan 20, 2025

Verified

This commit was signed with the committer’s verified signature.
misak113 Michael Zabka
1 parent 9b62ee5 commit c5a7b94
Showing 2 changed files with 14 additions and 11 deletions.
7 changes: 5 additions & 2 deletions beetsplug/lyrics.py
Original file line number Diff line number Diff line change
@@ -747,6 +747,7 @@ def scrape(cls, html: str) -> str | None:
class Translator(RequestHandler):
TRANSLATE_URL = "https://api.cognitive.microsofttranslator.com/translate"
LINE_PARTS_RE = re.compile(r"^(\[\d\d:\d\d.\d\d\]|) *(.*)$")
SEPARATOR = " | "
remove_translations = partial(re.compile(r" / [^\n]+").sub, "")

_log: Logger
@@ -776,14 +777,16 @@ def get_translations(self, texts: Iterable[str]) -> list[tuple[str, str]]:
map the translations back to the original texts.
"""
unique_texts = list(dict.fromkeys(texts))
text = self.SEPARATOR.join(unique_texts)
data: list[TranslatorAPI.Response] = self.post_json(
self.TRANSLATE_URL,
headers={"Ocp-Apim-Subscription-Key": self.api_key},
json=[{"text": "|".join(unique_texts)}],
json=[{"text": text}],
params={"api-version": "3.0", "to": self.to_language},
)

translations = data[0]["translations"][0]["text"].split("|")
translated_text = data[0]["translations"][0]["text"]
translations = translated_text.split(self.SEPARATOR)
trans_by_text = dict(zip(unique_texts, translations))
return list(zip(texts, (trans_by_text.get(t, "") for t in texts)))

18 changes: 9 additions & 9 deletions test/plugins/test_lyrics.py
Original file line number Diff line number Diff line change
@@ -548,23 +548,23 @@ def callback(request, _):
if b"Refrain" in request.body:
translations = (
""
"|[Refrain : Doja Cat]"
"|Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir)" # noqa: E501
"|Mon corps ne me laissait pas le cacher (Cachez-le)"
"|Quoi qu’il arrive, je ne plierais pas (Ne plierait pas, ne plierais pas)" # noqa: E501
"|Chevauchant à travers le tonnerre, la foudre"
" | [Refrain : Doja Cat]"
" | Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir)" # noqa: E501
" | Mon corps ne me laissait pas le cacher (Cachez-le)"
" | Quoi qu’il arrive, je ne plierais pas (Ne plierait pas, ne plierais pas)" # noqa: E501
" | Chevauchant à travers le tonnerre, la foudre"
)
elif b"00:00.00" in request.body:
translations = (
""
"|[00:00.00] Quelques paroles synchronisées"
"|[00:01.00] Quelques paroles plus synchronisées"
" | [00:00.00] Quelques paroles synchronisées"
" | [00:01.00] Quelques paroles plus synchronisées"
)
else:
translations = (
""
"|Quelques paroles synchronisées"
"|Quelques paroles plus synchronisées"
" | Quelques paroles synchronisées"
" | Quelques paroles plus synchronisées"
)

return [

0 comments on commit c5a7b94

Please sign in to comment.