Skip to content

Commit 73e4a5a

Browse files
committed
Try to GET LRCLib lyrics before searching
1 parent e89d718 commit 73e4a5a

File tree

3 files changed

+123
-56
lines changed

3 files changed

+123
-56
lines changed

beetsplug/lyrics.py

+107-49
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,11 @@
2525
import struct
2626
import unicodedata
2727
import warnings
28-
from functools import partial
29-
from typing import TYPE_CHECKING, ClassVar
28+
from contextlib import suppress
29+
from dataclasses import dataclass
30+
from functools import cached_property, partial, total_ordering
31+
from http import HTTPStatus
32+
from typing import TYPE_CHECKING, ClassVar, Iterable, Iterator
3033
from urllib.parse import quote, urlencode
3134

3235
import requests
@@ -98,6 +101,10 @@
98101
"""
99102

100103

104+
class NotFoundError(requests.exceptions.HTTPError):
105+
pass
106+
107+
101108
# Utilities.
102109

103110

@@ -276,14 +283,80 @@ class LRCLibItem(TypedDict):
276283
trackName: str
277284
artistName: str
278285
albumName: str
279-
duration: float
286+
duration: float | None
280287
instrumental: bool
281288
plainLyrics: str
282289
syncedLyrics: str | None
283290

284291

292+
@dataclass
293+
@total_ordering
294+
class LRCLyrics:
295+
#: Percentage tolerance for max duration difference between lyrics and item.
296+
DURATION_DIFF_TOLERANCE = 0.05
297+
298+
target_duration: float
299+
duration: float
300+
instrumental: bool
301+
plain: str
302+
synced: str | None
303+
304+
def __le__(self, other: LRCLyrics) -> bool:
305+
"""Compare two lyrics items by their score."""
306+
return self.dist < other.dist
307+
308+
@classmethod
309+
def make(cls, candidate: LRCLibItem, target_duration: float) -> LRCLyrics:
310+
return cls(
311+
target_duration,
312+
candidate["duration"] or 0.0,
313+
candidate["instrumental"],
314+
candidate["plainLyrics"],
315+
candidate["syncedLyrics"],
316+
)
317+
318+
@cached_property
319+
def duration_dist(self) -> float:
320+
"""Return the absolute difference between lyrics and target duration."""
321+
return abs(self.duration - self.target_duration)
322+
323+
@cached_property
324+
def is_valid(self) -> bool:
325+
"""Return whether the lyrics item is valid.
326+
Lyrics duration must be within the tolerance defined by
327+
:attr:`DURATION_DIFF_TOLERANCE`.
328+
"""
329+
return (
330+
self.duration_dist
331+
<= self.target_duration * self.DURATION_DIFF_TOLERANCE
332+
)
333+
334+
@cached_property
335+
def dist(self) -> tuple[float, bool]:
336+
"""Distance/score of the given lyrics item.
337+
338+
Return a tuple with the following values:
339+
1. Absolute difference between lyrics and target duration
340+
2. Boolean telling whether synced lyrics are available.
341+
342+
Best lyrics match is the one that has the closest duration to
343+
``target_duration`` and has synced lyrics available.
344+
"""
345+
return self.duration_dist, not self.synced
346+
347+
def get_text(self, want_synced: bool) -> str:
348+
if self.instrumental:
349+
return INSTRUMENTAL_LYRICS
350+
351+
return self.synced if want_synced and self.synced else self.plain
352+
353+
285354
class LRCLib(Backend):
286-
base_url = "https://lrclib.net/api/search"
355+
"""Fetch lyrics from the LRCLib API."""
356+
357+
BASE_URL = "https://lrclib.net/api"
358+
GET_URL = f"{BASE_URL}/get"
359+
SEARCH_URL = f"{BASE_URL}/search"
287360

288361
def warn(self, message: str, *args) -> None:
289362
"""Log a warning message with the class name."""
@@ -294,69 +367,54 @@ def fetch_json(self, *args, **kwargs):
294367
kwargs.setdefault("timeout", 10)
295368
kwargs.setdefault("headers", {"User-Agent": USER_AGENT})
296369
r = requests.get(*args, **kwargs)
370+
if r.status_code == HTTPStatus.NOT_FOUND:
371+
raise NotFoundError("HTTP Error: Not Found", response=r)
297372
r.raise_for_status()
298373

299374
return r.json()
300375

301-
@staticmethod
302-
def get_rank(
303-
target_duration: float, item: LRCLibItem
304-
) -> tuple[float, bool]:
305-
"""Rank the given lyrics item.
376+
def fetch_candidates(
377+
self, artist: str, title: str, album: str, length: int
378+
) -> Iterator[list[LRCLibItem]]:
379+
"""Yield lyrics candidates for the given song data.
306380
307-
Return a tuple with the following values:
308-
1. Absolute difference between lyrics and target duration
309-
2. Boolean telling whether synced lyrics are available.
381+
Firstly, attempt to GET lyrics directly, and then search the API if
382+
lyrics are not found or the duration does not match.
383+
384+
Return an iterator over lists of candidates.
310385
"""
311-
return (
312-
abs(item["duration"] - target_duration),
313-
not item["syncedLyrics"],
314-
)
386+
base_params = {"artist_name": artist, "track_name": title}
387+
get_params = {**base_params, "duration": length}
388+
if album:
389+
get_params["album_name"] = album
315390

316-
@classmethod
317-
def pick_lyrics(
318-
cls, target_duration: float, data: list[LRCLibItem]
319-
) -> LRCLibItem:
320-
"""Return best matching lyrics item from the given list.
391+
with suppress(NotFoundError):
392+
yield [self.fetch_json(self.GET_URL, params=get_params)]
321393

322-
Best lyrics match is the one that has the closest duration to
323-
``target_duration`` and has synced lyrics available.
394+
yield self.fetch_json(self.SEARCH_URL, params=base_params)
324395

325-
Note that the incoming list is guaranteed to be non-empty.
326-
"""
327-
return min(data, key=lambda item: cls.get_rank(target_duration, item))
396+
@classmethod
397+
def pick_best_match(cls, lyrics: Iterable[LRCLyrics]) -> LRCLyrics | None:
398+
"""Return best matching lyrics item from the given list."""
399+
return min((li for li in lyrics if li.is_valid), default=None)
328400

329401
def fetch(
330402
self, artist: str, title: str, album: str, length: int
331403
) -> str | None:
332-
"""Fetch lyrics for the given artist, title, and album."""
333-
params: dict[str, str | int] = {
334-
"artist_name": artist,
335-
"track_name": title,
336-
}
337-
if album:
338-
params["album_name"] = album
339-
340-
if length:
341-
params["duration"] = length
342-
404+
"""Fetch lyrics text for the given song data."""
405+
fetch = partial(self.fetch_candidates, artist, title, album, length)
406+
make = partial(LRCLyrics.make, target_duration=length)
407+
pick = self.pick_best_match
343408
try:
344-
data = self.fetch_json(self.base_url, params=params)
409+
return next(
410+
filter(None, map(pick, (map(make, x) for x in fetch())))
411+
).get_text(self.config["synced"])
412+
except StopIteration:
413+
pass
345414
except requests.JSONDecodeError:
346415
self.warn("Could not decode response JSON data")
347416
except requests.RequestException as exc:
348417
self.warn("Request error: {}", exc)
349-
else:
350-
if data:
351-
item = self.pick_lyrics(length, data)
352-
353-
if item["instrumental"]:
354-
return INSTRUMENTAL_LYRICS
355-
356-
if self.config["synced"] and (synced := item["syncedLyrics"]):
357-
return synced
358-
359-
return item["plainLyrics"]
360418

361419
return None
362420

docs/changelog.rst

+6-5
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,12 @@ Bug fixes:
5454
* :doc:`plugins/lyrics`: Do not attempt to search for lyrics if either the
5555
artist or title is missing and ignore ``artist_sort`` value if it is empty.
5656
:bug:`2635`
57-
* :doc:`plugins/lyrics`: Fix fetching lyrics from ``lrclib`` source. Instead of
58-
attempting to fetch lyrics for a specific album, artist, title and duration
59-
combination, the plugin now performs a search which yields many results.
60-
Update the default ``sources`` configuration to prioritize ``lrclib`` over
61-
other sources since it returns reliable results quicker than others.
57+
* :doc:`plugins/lyrics`: Fix fetching lyrics from ``lrclib`` source. If we
58+
cannot find lyrics for a specific album, artist, title combination, the
59+
plugin now tries to search for the artist and title and picks the most
60+
relevant result. Update the default ``sources`` configuration to prioritize
61+
``lrclib`` over other sources since it returns reliable results quicker than
62+
others.
6263
:bug:`5102`
6364

6465
For packagers:

test/plugins/test_lyrics.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,8 @@ def request_kwargs(self, response_data):
368368

369369
@pytest.fixture
370370
def fetch_lyrics(self, backend, requests_mock, request_kwargs):
371-
requests_mock.get(backend.base_url, **request_kwargs)
371+
requests_mock.get(backend.GET_URL, status_code=HTTPStatus.NOT_FOUND)
372+
requests_mock.get(backend.SEARCH_URL, **request_kwargs)
372373

373374
return partial(backend.fetch, "la", "la", "la", self.ITEM_DURATION)
374375

@@ -385,7 +386,14 @@ def test_synced_config_option(self, fetch_lyrics, expected_lyrics):
385386
[
386387
pytest.param([], None, id="handle non-matching lyrics"),
387388
pytest.param(
388-
[lyrics_match()], "synced", id="synced when available"
389+
[lyrics_match()],
390+
"synced",
391+
id="synced when available",
392+
),
393+
pytest.param(
394+
[lyrics_match(duration=1)],
395+
None,
396+
id="none: duration too short",
389397
),
390398
pytest.param(
391399
[lyrics_match(instrumental=True)],

0 commit comments

Comments
 (0)