14
14
15
15
"""Fetches, embeds, and displays lyrics."""
16
16
17
+ from __future__ import annotations
18
+
17
19
import difflib
18
20
import errno
19
21
import itertools
23
25
import struct
24
26
import unicodedata
25
27
import warnings
28
+ from functools import partial
26
29
from typing import ClassVar
27
30
from urllib .parse import quote , urlencode
28
31
47
50
48
51
import beets
49
52
from beets import plugins , ui
50
- from beets .autotag .hooks import string_dist
51
53
52
54
DIV_RE = re .compile (r"<(/?)div>?" , re .I )
53
55
COMMENT_RE = re .compile (r"<!--.*-->" , re .S )
@@ -288,7 +290,7 @@ class DirectBackend(Backend):
288
290
@classmethod
289
291
def encode (cls , text : str ) -> str :
290
292
"""Encode the string for inclusion in a URL."""
291
- return quote ( unidecode ( text ))
293
+ raise NotImplementedError
292
294
293
295
@classmethod
294
296
def build_url (cls , * args : str ) -> str :
@@ -312,7 +314,7 @@ def encode(cls, text: str) -> str:
312
314
for old , new in cls .REPLACEMENTS .items ():
313
315
text = re .sub (old , new , text )
314
316
315
- return super (). encode (text )
317
+ return quote ( unidecode (text ) )
316
318
317
319
def fetch (self , artist , title , album = None , length = None ):
318
320
url = self .build_url (artist , title )
@@ -485,86 +487,30 @@ class Tekstowo(DirectBackend):
485
487
"""Fetch lyrics from Tekstowo.pl."""
486
488
487
489
REQUIRES_BS = True
488
- BASE_URL = "http://www.tekstowo.pl"
489
- URL_TEMPLATE = BASE_URL + "/wyszukaj.html?search-title={}&search-artist={}"
490
-
491
- def fetch (self , artist , title , album = None , length = None ):
492
- url = self .build_url (title , artist )
493
- search_results = self .fetch_url (url )
494
- if not search_results :
495
- return None
496
-
497
- song_page_url = self .parse_search_results (search_results )
498
- if not song_page_url :
499
- return None
500
-
501
- song_page_html = self .fetch_url (song_page_url )
502
- if not song_page_html :
503
- return None
504
-
505
- return self .extract_lyrics (song_page_html , artist , title )
490
+ URL_TEMPLATE = "https://www.tekstowo.pl/piosenka,{},{}.html"
506
491
507
- def parse_search_results (self , html ):
508
- html = _scrape_strip_cruft (html )
509
- html = _scrape_merge_paragraphs (html )
510
-
511
- soup = try_parse_html (html )
512
- if not soup :
513
- return None
514
-
515
- content_div = soup .find ("div" , class_ = "content" )
516
- if not content_div :
517
- return None
518
-
519
- card_div = content_div .find ("div" , class_ = "card" )
520
- if not card_div :
521
- return None
492
+ non_alpha_to_underscore = partial (re .compile (r"\W" ).sub , "_" )
522
493
523
- song_rows = card_div .find_all ("div" , class_ = "box-przeboje" )
524
- if not song_rows :
525
- return None
526
-
527
- song_row = song_rows [0 ]
528
- if not song_row :
529
- return None
494
+ @classmethod
495
+ def encode (cls , text : str ) -> str :
496
+ return cls .non_alpha_to_underscore (unidecode (text .lower ()))
530
497
531
- link = song_row . find ( "a" )
532
- if not link :
533
- return None
498
+ def fetch ( self , artist , title , album = None , length = None ):
499
+ if html := self . fetch_url ( self . build_url ( artist , title )) :
500
+ return self . extract_lyrics ( html )
534
501
535
- return self . BASE_URL + link . get ( "href" )
502
+ return None
536
503
537
- def extract_lyrics (self , html , artist , title ) :
504
+ def extract_lyrics (self , html : str ) -> str | None :
538
505
html = _scrape_strip_cruft (html )
539
506
html = _scrape_merge_paragraphs (html )
540
507
541
508
soup = try_parse_html (html )
542
- if not soup :
543
- return None
544
509
545
- info_div = soup .find ("div" , class_ = "col-auto" )
546
- if not info_div :
547
- return None
548
-
549
- info_elements = info_div .find_all ("a" )
550
- if not info_elements :
551
- return None
552
-
553
- html_title = info_elements [- 1 ].get_text ()
554
- html_artist = info_elements [- 2 ].get_text ()
555
-
556
- title_dist = string_dist (html_title , title )
557
- artist_dist = string_dist (html_artist , artist )
510
+ if lyrics_div := soup .select_one ("div.song-text > div.inner-text" ):
511
+ return lyrics_div .get_text ()
558
512
559
- thresh = self .config ["dist_thresh" ].get (float )
560
- if title_dist > thresh or artist_dist > thresh :
561
- return None
562
-
563
- lyrics_div = soup .select ("div.song-text > div.inner-text" )
564
- if not lyrics_div :
565
- return None
566
-
567
- return lyrics_div [0 ].get_text ()
513
+ return None
568
514
569
515
570
516
def remove_credits (text ):
0 commit comments