@@ -204,15 +204,15 @@ def build_query(self, project: str, chunk_start: datetime, chunk_end: datetime,
204
204
Returns:
205
205
A formatted query string.
206
206
"""
207
- return f'project:{ project } buildStartTime:[{ chunk_start .isoformat ()} TO { chunk_end .isoformat ()} ] gradle.requestedTasks:{ test_type } '
207
+ return f'project:{ project } buildStartTime:[{ chunk_start .isoformat ()} TO { chunk_end .isoformat ()} ] gradle.requestedTasks:{ test_type } tag:github tag:trunk '
208
208
209
209
def process_chunk (
210
210
self ,
211
211
chunk_start : datetime ,
212
212
chunk_end : datetime ,
213
213
project : str ,
214
214
test_type : str ,
215
- remaining_build_ids : set ,
215
+ remaining_build_ids : set | None ,
216
216
max_builds_per_request : int
217
217
) -> Dict [str , BuildInfo ]:
218
218
"""Helper method to process a single chunk of build information"""
@@ -225,7 +225,7 @@ def process_chunk(
225
225
from_build = None
226
226
continue_chunk = True
227
227
228
- while continue_chunk and remaining_build_ids :
228
+ while continue_chunk and ( remaining_build_ids is None or remaining_build_ids ) :
229
229
query_params = {
230
230
'query' : query ,
231
231
'models' : ['gradle-attributes' ],
@@ -273,14 +273,16 @@ def process_chunk(
273
273
continue_chunk = False
274
274
break
275
275
276
- if build_id in remaining_build_ids :
276
+ if remaining_build_ids is None or build_id in remaining_build_ids :
277
277
if 'problem' not in gradle_attrs :
278
278
chunk_builds [build_id ] = BuildInfo (
279
279
id = build_id ,
280
280
timestamp = build_timestamp ,
281
281
duration = attrs .get ('buildDuration' ),
282
282
has_failed = attrs .get ('hasFailed' , False )
283
283
)
284
+ if remaining_build_ids is not None :
285
+ remaining_build_ids .remove (build_id )
284
286
285
287
if continue_chunk and response_json :
286
288
from_build = response_json [- 1 ]['id' ]
@@ -290,38 +292,47 @@ def process_chunk(
290
292
time .sleep (0.5 ) # Rate limiting between pagination requests
291
293
292
294
return chunk_builds
293
-
294
- def get_build_info (self , build_ids : List [str ], project : str , test_type : str , query_days : int ) -> Dict [str , BuildInfo ]:
295
+ def get_build_info (self , build_ids : List [str ] = None , project : str = None , test_type : str = None , query_days : int = None , bypass_cache : bool = False , fetch_all : bool = False ) -> Dict [str , BuildInfo ]:
295
296
builds = {}
296
297
max_builds_per_request = 100
297
298
cutoff_date = datetime .now (pytz .UTC ) - timedelta (days = query_days )
299
+ current_time = datetime .now (pytz .UTC )
298
300
299
- # Get builds from cache if available
300
- if self .build_cache :
301
+ if not fetch_all and not build_ids :
302
+ raise ValueError ("Either build_ids must be provided or fetch_all must be True" )
303
+
304
+ # Get builds from cache if available and bypass_cache is False
305
+ if not bypass_cache and self .build_cache :
301
306
cached_builds = self .build_cache .builds
302
307
cached_cutoff = self .build_cache .last_update - timedelta (days = query_days )
303
308
304
- # Use cached data for builds within the cache period
305
- for build_id in build_ids :
306
- if build_id in cached_builds :
307
- build = cached_builds [build_id ]
309
+ if fetch_all :
310
+ # Use all cached builds within the time period
311
+ for build_id , build in cached_builds .items ():
308
312
if build .timestamp >= cached_cutoff :
309
313
builds [build_id ] = build
314
+ else :
315
+ # Use cached data for specific builds within the cache period
316
+ for build_id in build_ids :
317
+ if build_id in cached_builds :
318
+ build = cached_builds [build_id ]
319
+ if build .timestamp >= cached_cutoff :
320
+ builds [build_id ] = build
310
321
311
322
# Update cutoff date to only fetch new data
312
323
cutoff_date = self .build_cache .last_update
313
324
logger .info (f"Using cached data up to { cutoff_date .isoformat ()} " )
314
325
315
- # Remove already found builds from the search list
316
- build_ids = [bid for bid in build_ids if bid not in builds ]
317
-
318
- if not build_ids :
319
- logger .info ("All builds found in cache" )
320
- return builds
326
+ if not fetch_all :
327
+ # Remove already found builds from the search list
328
+ build_ids = [bid for bid in build_ids if bid not in builds ]
329
+
330
+ if not build_ids :
331
+ logger .info ("All builds found in cache" )
332
+ return builds
321
333
322
334
# Fetch remaining builds from API
323
- remaining_build_ids = set (build_ids )
324
- current_time = datetime .now (pytz .UTC )
335
+ remaining_build_ids = set (build_ids ) if not fetch_all else None
325
336
chunk_size = self .default_chunk_size
326
337
327
338
# Create time chunks
@@ -343,7 +354,7 @@ def get_build_info(self, build_ids: List[str], project: str, test_type: str, que
343
354
chunk [1 ],
344
355
project ,
345
356
test_type ,
346
- remaining_build_ids .copy (),
357
+ remaining_build_ids .copy () if remaining_build_ids else None ,
347
358
max_builds_per_request
348
359
): chunk for chunk in chunks
349
360
}
@@ -352,7 +363,8 @@ def get_build_info(self, build_ids: List[str], project: str, test_type: str, que
352
363
try :
353
364
chunk_builds = future .result ()
354
365
builds .update (chunk_builds )
355
- remaining_build_ids -= set (chunk_builds .keys ())
366
+ if remaining_build_ids :
367
+ remaining_build_ids -= set (chunk_builds .keys ())
356
368
except Exception as e :
357
369
logger .error (f"Chunk processing generated an exception: { str (e )} " )
358
370
@@ -361,19 +373,18 @@ def get_build_info(self, build_ids: List[str], project: str, test_type: str, que
361
373
f"\n Build Info Performance:"
362
374
f"\n Total Duration: { total_duration :.2f} s"
363
375
f"\n Builds Retrieved: { len (builds )} "
364
- f"\n Builds Not Found: { len (remaining_build_ids )} "
376
+ f"\n Builds Not Found: { len (remaining_build_ids ) if remaining_build_ids else 0 } "
365
377
)
366
378
367
- # Update cache with new data
368
- if builds :
379
+ # Update cache with new data if not bypassing cache
380
+ if builds and not bypass_cache :
369
381
if not self .build_cache :
370
382
self .build_cache = BuildCache (current_time , {})
371
383
self .build_cache .builds .update (builds )
372
384
self .build_cache .last_update = current_time
373
385
self ._save_cache ()
374
386
375
387
return builds
376
-
377
388
def get_test_results (self , project : str , threshold_days : int , test_type : str = "quarantinedTest" ,
378
389
outcomes : List [str ] = None ) -> List [TestResult ]:
379
390
"""Fetch test results with timeline information"""
@@ -464,6 +475,11 @@ def get_test_results(self, project: str, threshold_days: int, test_type: str = "
464
475
# Sort timeline by timestamp
465
476
result .timeline = sorted (timeline , key = lambda x : x .timestamp )
466
477
logger .debug (f"Final timeline entries for { test_name } : { len (result .timeline )} " )
478
+
479
+ # Print build details for debugging
480
+ logger .debug ("Timeline entries:" )
481
+ for entry in timeline :
482
+ logger .debug (f"Build ID: { entry .build_id } , Timestamp: { entry .timestamp } , Outcome: { entry .outcome } " )
467
483
468
484
# Calculate recent failure rate
469
485
recent_cutoff = datetime .now (pytz .UTC ) - timedelta (days = 30 )
@@ -768,32 +784,60 @@ def get_cleared_tests(self, project: str, results: List[TestResult],
768
784
769
785
return cleared_tests
770
786
771
- def get_develocity_class_link (class_name : str , threshold_days : int ) -> str :
787
+ def update_cache (self , builds : Dict [str , BuildInfo ]):
788
+ """
789
+ Update the build cache with new build information.
790
+
791
+ Args:
792
+ builds: Dictionary of build IDs to BuildInfo objects
793
+ """
794
+ current_time = datetime .now (pytz .UTC )
795
+
796
+ # Initialize cache if it doesn't exist
797
+ if not self .build_cache :
798
+ self .build_cache = BuildCache (current_time , {})
799
+
800
+ # Update builds and last update time
801
+ self .build_cache .builds .update (builds )
802
+ self .build_cache .last_update = current_time
803
+
804
+ # Save to all cache providers
805
+ self ._save_cache ()
806
+
807
+ logger .info (f"Updated cache with { len (builds )} builds" )
808
+
809
+ def get_develocity_class_link (class_name : str , threshold_days : int , test_type : str = None ) -> str :
772
810
"""
773
811
Generate Develocity link for a test class
774
812
775
813
Args:
776
814
class_name: Name of the test class
777
815
threshold_days: Number of days to look back in search
816
+ test_type: Type of test (e.g., "quarantinedTest", "test")
778
817
"""
779
818
base_url = "https://ge.apache.org/scans/tests"
780
819
params = {
781
820
"search.rootProjectNames" : "kafka" ,
782
821
"search.tags" : "github,trunk" ,
783
- "search.timeZoneId" : "America/New_York " ,
822
+ "search.timeZoneId" : "UTC " ,
784
823
"search.relativeStartTime" : f"P{ threshold_days } D" ,
785
824
"tests.container" : class_name
786
825
}
826
+
827
+ if test_type :
828
+ params ["search.tasks" ] = test_type
829
+
787
830
return f"{ base_url } ?{ '&' .join (f'{ k } ={ requests .utils .quote (str (v ))} ' for k , v in params .items ())} "
788
831
789
- def get_develocity_method_link (class_name : str , method_name : str , threshold_days : int ) -> str :
832
+ def get_develocity_method_link (class_name : str , method_name : str , threshold_days : int , test_type : str = None ) -> str :
790
833
"""
791
834
Generate Develocity link for a test method
792
835
793
836
Args:
794
837
class_name: Name of the test class
795
838
method_name: Name of the test method
796
839
threshold_days: Number of days to look back in search
840
+ test_type: Type of test (e.g., "quarantinedTest", "test")
797
841
"""
798
842
base_url = "https://ge.apache.org/scans/tests"
799
843
@@ -804,14 +848,18 @@ def get_develocity_method_link(class_name: str, method_name: str, threshold_days
804
848
params = {
805
849
"search.rootProjectNames" : "kafka" ,
806
850
"search.tags" : "github,trunk" ,
807
- "search.timeZoneId" : "America/New_York " ,
851
+ "search.timeZoneId" : "UTC " ,
808
852
"search.relativeStartTime" : f"P{ threshold_days } D" ,
809
853
"tests.container" : class_name ,
810
854
"tests.test" : method_name
811
855
}
856
+
857
+ if test_type :
858
+ params ["search.tasks" ] = test_type
859
+
812
860
return f"{ base_url } ?{ '&' .join (f'{ k } ={ requests .utils .quote (str (v ))} ' for k , v in params .items ())} "
813
861
814
- def print_most_problematic_tests (problematic_tests : Dict [str , Dict ], threshold_days : int ):
862
+ def print_most_problematic_tests (problematic_tests : Dict [str , Dict ], threshold_days : int , test_type : str = None ):
815
863
"""Print a summary of the most problematic tests"""
816
864
print ("\n ## Most Problematic Tests" )
817
865
if not problematic_tests :
@@ -827,7 +875,7 @@ def print_most_problematic_tests(problematic_tests: Dict[str, Dict], threshold_d
827
875
for test_name , details in sorted (problematic_tests .items (),
828
876
key = lambda x : x [1 ]['failure_rate' ],
829
877
reverse = True ):
830
- class_link = get_develocity_class_link (test_name , threshold_days )
878
+ class_link = get_develocity_class_link (test_name , threshold_days , test_type = test_type )
831
879
print (f"<tr><td colspan=\" 4\" >{ test_name } </td><td><a href=\" { class_link } \" >↗️</a></td></tr>" )
832
880
833
881
for test_case in sorted (details ['test_cases' ],
@@ -836,7 +884,7 @@ def print_most_problematic_tests(problematic_tests: Dict[str, Dict], threshold_d
836
884
reverse = True ):
837
885
method_name = test_case .name .split ('.' )[- 1 ]
838
886
if method_name != 'N/A' :
839
- method_link = get_develocity_method_link (test_name , test_case .name , threshold_days )
887
+ method_link = get_develocity_method_link (test_name , test_case .name , threshold_days , test_type = "quarantinedTest" )
840
888
total_runs = test_case .outcome_distribution .total
841
889
failure_rate = (test_case .outcome_distribution .failed + test_case .outcome_distribution .flaky ) / total_runs if total_runs > 0 else 0
842
890
print (f"<tr><td></td><td>{ method_name } </td>"
@@ -925,7 +973,7 @@ def print_flaky_regressions(flaky_regressions: Dict[str, Dict], threshold_days:
925
973
926
974
print ("</details>" )
927
975
928
- def print_cleared_tests (cleared_tests : Dict [str , Dict ], threshold_days : int ):
976
+ def print_cleared_tests (cleared_tests : Dict [str , Dict ], threshold_days : int , test_type : str = None ):
929
977
"""Print tests that are ready to be unquarantined"""
930
978
print ("\n ## Cleared Tests (Ready for Unquarantine)" )
931
979
if not cleared_tests :
@@ -945,7 +993,7 @@ def print_cleared_tests(cleared_tests: Dict[str, Dict], threshold_days: int):
945
993
for test_name , details in sorted (cleared_tests .items (),
946
994
key = lambda x : x [1 ]['success_rate' ],
947
995
reverse = True ):
948
- class_link = get_develocity_class_link (test_name , threshold_days )
996
+ class_link = get_develocity_class_link (test_name , threshold_days , test_type = test_type )
949
997
print (f"<tr><td colspan=\" 5\" >{ test_name } </td><td><a href=\" { class_link } \" >↗️</a></td></tr>" )
950
998
print (f"<tr><td></td><td>Class Overall</td>"
951
999
f"<td>{ details ['success_rate' ]:.2%} </td>"
@@ -1015,6 +1063,12 @@ def main():
1015
1063
analyzer = TestAnalyzer (BASE_URL , token )
1016
1064
1017
1065
try :
1066
+ quarantined_builds = analyzer .get_build_info ([], PROJECT , "quarantinedTest" , 7 , bypass_cache = True , fetch_all = True )
1067
+ regular_builds = analyzer .get_build_info ([], PROJECT , "test" , 7 , bypass_cache = True , fetch_all = True )
1068
+
1069
+ analyzer .update_cache (quarantined_builds )
1070
+ analyzer .update_cache (regular_builds )
1071
+
1018
1072
# Get test results
1019
1073
quarantined_results = analyzer .get_test_results (
1020
1074
PROJECT ,
@@ -1054,9 +1108,9 @@ def main():
1054
1108
print (f"This report was run on { datetime .now (pytz .UTC ).strftime ('%Y-%m-%d %H:%M:%S' )} UTC" )
1055
1109
1056
1110
# Print each section
1057
- print_most_problematic_tests (problematic_tests , QUARANTINE_THRESHOLD_DAYS )
1111
+ print_most_problematic_tests (problematic_tests , QUARANTINE_THRESHOLD_DAYS , test_type = "quarantinedTest" )
1058
1112
print_flaky_regressions (flaky_regressions , QUARANTINE_THRESHOLD_DAYS )
1059
- print_cleared_tests (cleared_tests , QUARANTINE_THRESHOLD_DAYS )
1113
+ print_cleared_tests (cleared_tests , QUARANTINE_THRESHOLD_DAYS , test_type = "quarantinedTest" )
1060
1114
1061
1115
except Exception as e :
1062
1116
logger .exception ("Error occurred during report generation" )
0 commit comments