@@ -108,7 +108,6 @@ class Domain():
108
108
109
109
# Find invoices.
110
110
your_orders : str
111
- archived_orders : str
112
111
invoice : str
113
112
invoice_link : List [str ]
114
113
order_summary : str
@@ -140,7 +139,6 @@ def __init__(self) -> None:
140
139
sign_out = 'Sign Out' ,
141
140
142
141
your_orders = 'Your Orders' ,
143
- archived_orders = 'Archived Orders' ,
144
142
invoice = 'Invoice' ,
145
143
invoice_link = ["View order" , "View invoice" ],
146
144
# View invoice -> regular/digital order, View order -> Amazon Fresh
@@ -173,7 +171,6 @@ def __init__(self) -> None:
173
171
sign_out = 'Sign out' ,
174
172
175
173
your_orders = 'Your Orders' ,
176
- archived_orders = 'Archived Orders' ,
177
174
invoice = 'Invoice' ,
178
175
invoice_link = ["View order" , "View invoice" ],
179
176
# View invoice -> regular/digital order, View order -> Amazon Fresh
@@ -205,7 +202,6 @@ def __init__(self) -> None:
205
202
sign_out = 'Abmelden' ,
206
203
207
204
your_orders = 'Meine Bestellungen' ,
208
- archived_orders = 'Archivierte Bestellungen' ,
209
205
invoice = 'Rechnung' ,
210
206
invoice_link = ["Bestelldetails anzeigen" ],
211
207
fresh_fallback = None ,
@@ -329,7 +325,7 @@ def get_invoice_path(self, year, order_id):
329
325
return os .path .join (self .output_directory , order_id + '.html' )
330
326
331
327
def get_order_id (self , href ) -> str :
332
- m = re .match ('.*[&?]orderI[Dd] =((?:D)?[0-9\\ -]+)(?:&.*)?$' , href )
328
+ m = re .match ('.*[&?]orderID =((?:D)?[0-9\\ -]+)(?:&.*)?$' , href )
333
329
if m is None :
334
330
raise RuntimeError (
335
331
'Failed to parse order ID from href %r' % (href , ))
@@ -363,30 +359,34 @@ def get_invoice_urls():
363
359
# order summary is hidden behind submenu which requires a click to be visible
364
360
365
361
def invoice_finder ():
366
- # order summary link is visible on page
367
- elements_raw = self .driver .find_elements (
368
- By .XPATH , '//a[contains(@href, "orderID=")]' )
369
- elements = []
370
- for invoice_link in elements_raw :
371
- if invoice_link .text not in self .domain .invoice_link :
372
- # skip invoice if label is not known
373
- # different labels are possible e.g. for regular orders vs. Amazon fresh
374
- if invoice_link .text != "" :
375
- # log non-empty link texts -> may be new type
376
- logger .debug (
377
- 'Skipping invoice due to unknown invoice_link.text: %s' ,
378
- invoice_link .text )
379
- else :
380
- elements .append (invoice_link )
381
- return elements
382
-
362
+ if not self .domain .order_summary_hidden :
363
+ # order summary link is visible on page
364
+ return self .driver .find_elements (
365
+ By .XPATH , '//a[contains(@href, "orderID=")]' )
366
+ else :
367
+ # order summary link is hidden in submenu for each order
368
+ elements = self .driver .find_elements (By .XPATH ,
369
+ '//a[@class="a-popover-trigger a-declarative"]' )
370
+ return [a for a in elements if a .text == self .domain .invoice ]
371
+
383
372
if initial_iteration :
384
373
invoices = invoice_finder ()
385
374
else :
386
375
invoices , = self .wait_and_return (invoice_finder )
387
376
initial_iteration = False
388
377
378
+ last_order_id = None
379
+
389
380
def invoice_link_finder (invoice_link ):
381
+ if invoice_link .text not in self .domain .invoice_link :
382
+ # skip invoice if label is not known
383
+ # different labels are possible e.g. for regular orders vs. Amazon fresh
384
+ if invoice_link .text != "" :
385
+ # log non-empty link texts -> may be new type
386
+ logger .debug (
387
+ 'Skipping invoice due to unknown invoice_link.text: %s' ,
388
+ invoice_link .text )
389
+ return (False , False )
390
390
href = invoice_link .get_attribute ('href' )
391
391
order_id = self .get_order_id (href )
392
392
if self .domain .fresh_fallback is not None and invoice_link .text == self .domain .fresh_fallback :
@@ -397,39 +397,26 @@ def invoice_link_finder(invoice_link):
397
397
tokens [- 1 ] = f"gp/css/summary/print.html?orderID={ order_id } "
398
398
href = "/" .join (tokens )
399
399
return (order_id , href )
400
-
401
- def invoice_link_finder_hidden (invoice_link ):
402
- # get order id to later find the correct summary link
403
- order_id = self .get_order_id (invoice_link .get_attribute ('href' ))
404
-
405
- # get parent element to search for invoice menu button (has no orderID specified)
406
- parent = invoice_link .find_element (By .XPATH ,"./.." )
407
- # leading dot in './/' specifies to only search in children
408
- popover = parent .find_elements (By .XPATH ,'.//a[contains(@href, "invoice/invoice.html")]' )
409
- # depending on the order group the XPATH may be different
410
- if len (popover ) == 0 :
411
- popover = parent .find_elements (
412
- By .XPATH ,
413
- f'.//a[contains(text(), { self .domain .invoice } ) and @class="a-popover-trigger a-declarative"]' )
414
-
415
- # open invoice popover to extract invoice link
416
- popover [0 ].click ()
417
-
418
- # submenu containing order summary takes some time to load after click
419
- summary_link , = self .wait_and_locate (
420
- (By .XPATH ,'//a[contains(@href,"{}") and contains(text(),"{}")]' .format (order_id , self .domain .order_summary )))
421
- if summary_link :
422
- href = summary_link .get_attribute ('href' )
423
- return (order_id , href )
424
- else :
425
- logger .info ('Link extraction failed for order id: %r' , order_id )
426
- return (False , False )
400
+
401
+ def invoice_link_finder_hidden ():
402
+ # submenu containing order summary takes some time to load after click
403
+ # search for order summary link and compare order_id
404
+ # repeat until order_id is different to last order_id
405
+ summary_links = self .driver .find_elements (By .LINK_TEXT ,
406
+ self .domain .order_summary )
407
+ if summary_links :
408
+ href = summary_links [0 ].get_attribute ('href' )
409
+ order_id = self .get_order_id (href )
410
+ if order_id != last_order_id :
411
+ return (order_id , href )
412
+ return False
427
413
428
414
for invoice_link in invoices :
429
415
if not self .domain .order_summary_hidden :
430
416
(order_id , href ) = invoice_link_finder (invoice_link )
431
417
else :
432
- (order_id , href ) = invoice_link_finder_hidden (invoice_link )
418
+ invoice_link .click ()
419
+ (order_id , href ), = self .wait_and_return (invoice_link_finder_hidden )
433
420
if order_id :
434
421
if order_id in order_ids_seen :
435
422
logger .info ('Skipping already-seen order id: %r' , order_id )
@@ -440,6 +427,7 @@ def invoice_link_finder_hidden(invoice_link):
440
427
logger .info ('Found order \' {}\' ' .format (order_id ))
441
428
invoice_hrefs .append ((href , order_id ))
442
429
order_ids_seen .add (order_id )
430
+ last_order_id = order_id
443
431
444
432
# Find next link
445
433
next_links = self .find_elements_by_descendant_text_match (
@@ -457,9 +445,7 @@ def retrieve_all_order_groups():
457
445
order_select_index = 0
458
446
459
447
while True :
460
- (order_filter ,), = self .wait_and_return (
461
- lambda : self .find_visible_elements (By .XPATH , '//select[@name="timeFilter"]' )
462
- )
448
+ order_filter , = self .wait_and_locate ((By .CSS_SELECTOR , '#time-filter, #orderFilter' ))
463
449
order_select = Select (order_filter )
464
450
num_options = len (order_select .options )
465
451
if order_select_index >= num_options :
@@ -468,7 +454,7 @@ def retrieve_all_order_groups():
468
454
order_select_index ]
469
455
option_text = option .text .strip ()
470
456
order_select_index += 1
471
- if option_text == self . domain . archived_orders :
457
+ if option_text == 'Archived Orders' :
472
458
continue
473
459
if self .order_groups is not None and option_text not in self .order_groups :
474
460
logger .info ('Skipping order group: %r' , option_text )
0 commit comments