13
13
14
14
15
15
class CachingWikidataRelationSource (AbstractRelationSource ):
16
-
17
16
DEFAULT_CHUNK_SIZE = 500
18
17
19
18
def __init__ (self , linkings : EntityLinkings , wikidata_endpoint : WikidataEndpoint ):
@@ -84,7 +83,8 @@ def _decrease_chunk_size(self) -> None:
84
83
__succeeded_requests : int = 0
85
84
__cache_lock : threading .Lock = threading .Lock ()
86
85
__cached_relations : Optional [Dict [str , List [Relation ]]] = None
87
- __cache_modified : bool = False
86
+ __new_cached_relations : List [Relation ] = []
87
+ __caching_file_lock : threading .Lock = threading .Lock ()
88
88
__cache_file : Path = Path (os .path .dirname (os .path .abspath (__file__ )), ".." , ".cached_relations.csv" )
89
89
90
90
def chunk_size (self ) -> int :
@@ -97,15 +97,21 @@ def _synchronized_chunk_size() -> int:
97
97
98
98
@staticmethod
99
99
def _retrieve_relations_from_cache (entities : List [str ]) -> Iterable [Relation ]:
100
+ cached_relations : Optional [Generator [List [Relation ]]] = None
101
+
100
102
with CachingWikidataRelationSource .__cache_lock :
101
103
CachingWikidataRelationSource .__initialize_cache ()
102
104
103
105
cached_entities : Iterator [str ] = filter (lambda x : x in CachingWikidataRelationSource .__cached_relations ,
104
106
entities )
105
- cached_relations : Generator [List [Relation ]] = (CachingWikidataRelationSource .__cached_relations [entity ] for
106
- entity in cached_entities )
107
- for list_of_relations in cached_relations :
108
- yield from list_of_relations
107
+ cached_relations = (CachingWikidataRelationSource .__cached_relations [entity ] for
108
+ entity in cached_entities )
109
+
110
+ if cached_relations is None :
111
+ raise StopIteration
112
+
113
+ for list_of_relations in cached_relations :
114
+ yield from list_of_relations
109
115
110
116
@staticmethod
111
117
def _add_to_cache (relations : Iterable [Relation ]) -> None :
@@ -117,7 +123,9 @@ def _add_to_cache(relations: Iterable[Relation]) -> None:
117
123
CachingWikidataRelationSource .__cached_relations [relation .source ] = []
118
124
119
125
CachingWikidataRelationSource .__cached_relations [relation .source ].append (relation )
120
- CachingWikidataRelationSource .__cache_modified = True
126
+ CachingWikidataRelationSource .__new_cached_relations .append (relation )
127
+
128
+ CachingWikidataRelationSource ._save_cached_relations ()
121
129
122
130
@staticmethod
123
131
def __initialize_cache () -> None :
@@ -152,16 +160,19 @@ def __initialize_cache() -> None:
152
160
153
161
@staticmethod
154
162
def _save_cached_relations () -> None :
163
+ new_relations : List [Relation ] = []
155
164
with CachingWikidataRelationSource .__cache_lock :
156
- if not CachingWikidataRelationSource .__cache_modified :
157
- return
165
+ new_relations . extend ( CachingWikidataRelationSource .__new_cached_relations )
166
+ CachingWikidataRelationSource . __new_cached_relations . clear ()
158
167
159
- with CachingWikidataRelationSource . __cache_file . open ( "w+" ) as output_stream :
160
- print ( "source,name,target" , file = output_stream )
168
+ if len ( new_relations ) < 1 :
169
+ return
161
170
162
- for relation_source , relations in CachingWikidataRelationSource .__cached_relations .items ():
163
- for relation in relations :
164
- print (f"{ relation .source } ,{ relation .name } ,{ relation .target } " , file = output_stream )
171
+ with CachingWikidataRelationSource .__caching_file_lock :
172
+ if not CachingWikidataRelationSource .__cache_file .exists ():
173
+ with CachingWikidataRelationSource .__cache_file .open ("w+" ) as output_stream :
174
+ print ("source,name,target" , file = output_stream )
165
175
166
- def shutdown (self ) -> None :
167
- CachingWikidataRelationSource ._save_cached_relations ()
176
+ with CachingWikidataRelationSource .__cache_file .open ("a" ) as output_stream :
177
+ for relation in new_relations :
178
+ print (f"{ relation .source } ,{ relation .name } ,{ relation .target } " , file = output_stream )
0 commit comments