Skip to content

Commit 7ca5f57

Browse files
author
Johannes Schneider
committed
relax cache locking and implement regular cache backups
1 parent a963f28 commit 7ca5f57

File tree

3 files changed

+27
-23
lines changed

3 files changed

+27
-23
lines changed

ClusterAnnotater/cluster_annotator.py

-3
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,6 @@ def run(self) -> Iterable[RelationMetrics]:
5454
for worker in self._workers:
5555
worker.join()
5656

57-
for relation_source in self._relation_sources:
58-
relation_source.shutdown()
59-
6057
return self._collect_results()
6158

6259
def _collect_results(self) -> Iterable[RelationMetrics]:

RelationSource/abstract_relation_source.py

-4
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,3 @@ def _retrieve_relations_for(self, entities) -> List[Relation]:
1717
@abstractmethod
1818
def chunk_size(self) -> int:
1919
raise NotImplementedError
20-
21-
@abstractmethod
22-
def shutdown(self) -> None:
23-
raise NotImplementedError

RelationSource/caching_wikidata_relation_source.py

+27-16
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313

1414

1515
class CachingWikidataRelationSource(AbstractRelationSource):
16-
1716
DEFAULT_CHUNK_SIZE = 500
1817

1918
def __init__(self, linkings: EntityLinkings, wikidata_endpoint: WikidataEndpoint):
@@ -84,7 +83,8 @@ def _decrease_chunk_size(self) -> None:
8483
__succeeded_requests: int = 0
8584
__cache_lock: threading.Lock = threading.Lock()
8685
__cached_relations: Optional[Dict[str, List[Relation]]] = None
87-
__cache_modified: bool = False
86+
__new_cached_relations: List[Relation] = []
87+
__caching_file_lock: threading.Lock = threading.Lock()
8888
__cache_file: Path = Path(os.path.dirname(os.path.abspath(__file__)), "..", ".cached_relations.csv")
8989

9090
def chunk_size(self) -> int:
@@ -97,15 +97,21 @@ def _synchronized_chunk_size() -> int:
9797

9898
@staticmethod
9999
def _retrieve_relations_from_cache(entities: List[str]) -> Iterable[Relation]:
100+
cached_relations: Optional[Generator[List[Relation]]] = None
101+
100102
with CachingWikidataRelationSource.__cache_lock:
101103
CachingWikidataRelationSource.__initialize_cache()
102104

103105
cached_entities: Iterator[str] = filter(lambda x: x in CachingWikidataRelationSource.__cached_relations,
104106
entities)
105-
cached_relations: Generator[List[Relation]] = (CachingWikidataRelationSource.__cached_relations[entity] for
106-
entity in cached_entities)
107-
for list_of_relations in cached_relations:
108-
yield from list_of_relations
107+
cached_relations = (CachingWikidataRelationSource.__cached_relations[entity] for
108+
entity in cached_entities)
109+
110+
if cached_relations is None:
111+
raise StopIteration
112+
113+
for list_of_relations in cached_relations:
114+
yield from list_of_relations
109115

110116
@staticmethod
111117
def _add_to_cache(relations: Iterable[Relation]) -> None:
@@ -117,7 +123,9 @@ def _add_to_cache(relations: Iterable[Relation]) -> None:
117123
CachingWikidataRelationSource.__cached_relations[relation.source] = []
118124

119125
CachingWikidataRelationSource.__cached_relations[relation.source].append(relation)
120-
CachingWikidataRelationSource.__cache_modified = True
126+
CachingWikidataRelationSource.__new_cached_relations.append(relation)
127+
128+
CachingWikidataRelationSource._save_cached_relations()
121129

122130
@staticmethod
123131
def __initialize_cache() -> None:
@@ -152,16 +160,19 @@ def __initialize_cache() -> None:
152160

153161
@staticmethod
154162
def _save_cached_relations() -> None:
163+
new_relations: List[Relation] = []
155164
with CachingWikidataRelationSource.__cache_lock:
156-
if not CachingWikidataRelationSource.__cache_modified:
157-
return
165+
new_relations.extend(CachingWikidataRelationSource.__new_cached_relations)
166+
CachingWikidataRelationSource.__new_cached_relations.clear()
158167

159-
with CachingWikidataRelationSource.__cache_file.open("w+") as output_stream:
160-
print("source,name,target", file=output_stream)
168+
if len(new_relations) < 1:
169+
return
161170

162-
for relation_source, relations in CachingWikidataRelationSource.__cached_relations.items():
163-
for relation in relations:
164-
print(f"{relation.source},{relation.name},{relation.target}", file=output_stream)
171+
with CachingWikidataRelationSource.__caching_file_lock:
172+
if not CachingWikidataRelationSource.__cache_file.exists():
173+
with CachingWikidataRelationSource.__cache_file.open("w+") as output_stream:
174+
print("source,name,target", file=output_stream)
165175

166-
def shutdown(self) -> None:
167-
CachingWikidataRelationSource._save_cached_relations()
176+
with CachingWikidataRelationSource.__cache_file.open("a") as output_stream:
177+
for relation in new_relations:
178+
print(f"{relation.source},{relation.name},{relation.target}", file=output_stream)

0 commit comments

Comments
 (0)