19
19
import re
20
20
from copy import deepcopy
21
21
from dataclasses import dataclass
22
- from functools import total_ordering
22
+ from functools import cached_property , total_ordering
23
23
from typing import (
24
24
TYPE_CHECKING ,
25
25
Any ,
37
37
)
38
38
39
39
from jellyfish import levenshtein_distance
40
+ from rich_tables .utils import make_console
40
41
from typing_extensions import Self
41
42
from unidecode import unidecode
42
43
52
53
53
54
log = logging .getLogger (__name__ )
54
55
56
+ console = make_console ()
57
+
55
58
56
59
# Classes used to represent candidate options.
57
60
class AttrDict (dict ):
@@ -420,21 +423,21 @@ def string_dist(str1: Optional[str], str2: Optional[str]) -> float:
420
423
return base_dist + penalty
421
424
422
425
423
- class LazyClassProperty :
426
+ class cached_classproperty :
424
427
"""A decorator implementing a read-only property that is *lazy* in
425
428
the sense that the getter is only invoked once. Subsequent accesses
426
429
through *any* instance use the cached result.
427
430
"""
428
431
429
432
def __init__ (self , getter ):
430
433
self .getter = getter
431
- self .computed = False
434
+ self .cache = {}
435
+
436
+ def __get__ (self , instance , owner ):
437
+ if owner not in self .cache :
438
+ self .cache [owner ] = self .getter (owner )
432
439
433
- def __get__ (self , obj , owner ):
434
- if not self .computed :
435
- self .value = self .getter (owner )
436
- self .computed = True
437
- return self .value
440
+ return self .cache [owner ]
438
441
439
442
440
443
@total_ordering
@@ -448,7 +451,7 @@ def __init__(self):
448
451
self ._penalties = {}
449
452
self .tracks : Dict [TrackInfo , Distance ] = {}
450
453
451
- @LazyClassProperty
454
+ @cached_classproperty
452
455
def _weights (cls ) -> Dict [str , float ]: # noqa: N805
453
456
"""A dictionary from keys to floating-point weights."""
454
457
weights_view = config ["match" ]["distance_weights" ]
@@ -492,6 +495,7 @@ def raw_distance(self) -> float:
492
495
dist_raw += sum (penalty ) * self ._weights [key ]
493
496
return dist_raw
494
497
498
+ @property
495
499
def items (self ) -> List [Tuple [str , float ]]:
496
500
"""Return a list of (key, dist) pairs, with `dist` being the
497
501
weighted distance, sorted from highest to lowest. Does not
@@ -543,13 +547,13 @@ def __getitem__(self, key) -> float:
543
547
return 0.0
544
548
545
549
def __iter__ (self ) -> Iterator [Tuple [str , float ]]:
546
- return iter (self .items () )
550
+ return iter (self .items )
547
551
548
552
def __len__ (self ) -> int :
549
- return len (self .items () )
553
+ return len (self .items )
550
554
551
555
def keys (self ) -> List [str ]:
552
- return [key for key , _ in self .items () ]
556
+ return [key for key , _ in self .items ]
553
557
554
558
def update (self , dist : "Distance" ):
555
559
"""Adds all the distance penalties from `dist`."""
@@ -679,6 +683,13 @@ class Match:
679
683
distance : Distance
680
684
info : AttrDict
681
685
686
+ @cached_classproperty
687
+ def disambig_fields (cls ) -> Iterable [str ]:
688
+ fields : List [str ] = config ["match" ][
689
+ cls .disambig_fields_key
690
+ ].as_str_seq ()
691
+ return fields
692
+
682
693
@property
683
694
def dist (self ) -> str :
684
695
if self .distance <= config ["match" ]["strong_rec_thresh" ].as_number ():
@@ -693,22 +704,23 @@ def dist(self) -> str:
693
704
def name (self ) -> str :
694
705
return self .info .name
695
706
696
- @property
707
+ @cached_property
697
708
def penalty (self , limit : int = 0 ) -> Optional [str ]:
698
709
"""Returns a colorized string that indicates all the penalties
699
710
applied to a distance object.
700
711
"""
712
+ field_count = len (self .disambig_fields )
713
+ missing_field_count = sum (
714
+ 1 for f in self .disambig_fields if not self .info .get (f )
715
+ )
716
+ self .distance .add ("missing_fields" , missing_field_count / field_count )
701
717
penalties = self .distance .penalties
702
718
if penalties :
703
719
if limit and len (penalties ) > limit :
704
720
penalties = penalties [:limit ] + ["..." ]
705
721
return colorize ("text_warning" , f"({ ', ' .join (penalties )} )" )
706
722
return None
707
723
708
- @property
709
- def disambig_fields (self ) -> Iterable [str ]:
710
- return config ["match" ][self .disambig_fields_key ].as_str_seq ()
711
-
712
724
@property
713
725
def dist_data (self ) -> JSONDict :
714
726
return {
0 commit comments