Skip to content

Commit 22f1eb7

Browse files
authored
Revert "[amazon] fixes for DE (#86)"
This reverts commit b2867f0.
1 parent b2867f0 commit 22f1eb7

File tree

1 file changed

+40
-54
lines changed

1 file changed

+40
-54
lines changed

finance_dl/amazon.py

+40-54
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,6 @@ class Domain():
108108

109109
# Find invoices.
110110
your_orders: str
111-
archived_orders: str
112111
invoice: str
113112
invoice_link: List[str]
114113
order_summary: str
@@ -140,7 +139,6 @@ def __init__(self) -> None:
140139
sign_out='Sign Out',
141140

142141
your_orders='Your Orders',
143-
archived_orders='Archived Orders',
144142
invoice='Invoice',
145143
invoice_link=["View order", "View invoice"],
146144
# View invoice -> regular/digital order, View order -> Amazon Fresh
@@ -173,7 +171,6 @@ def __init__(self) -> None:
173171
sign_out='Sign out',
174172

175173
your_orders='Your Orders',
176-
archived_orders='Archived Orders',
177174
invoice='Invoice',
178175
invoice_link=["View order", "View invoice"],
179176
# View invoice -> regular/digital order, View order -> Amazon Fresh
@@ -205,7 +202,6 @@ def __init__(self) -> None:
205202
sign_out='Abmelden',
206203

207204
your_orders='Meine Bestellungen',
208-
archived_orders='Archivierte Bestellungen',
209205
invoice='Rechnung',
210206
invoice_link=["Bestelldetails anzeigen"],
211207
fresh_fallback=None,
@@ -329,7 +325,7 @@ def get_invoice_path(self, year, order_id):
329325
return os.path.join(self.output_directory, order_id + '.html')
330326

331327
def get_order_id(self, href) -> str:
332-
m = re.match('.*[&?]orderI[Dd]=((?:D)?[0-9\\-]+)(?:&.*)?$', href)
328+
m = re.match('.*[&?]orderID=((?:D)?[0-9\\-]+)(?:&.*)?$', href)
333329
if m is None:
334330
raise RuntimeError(
335331
'Failed to parse order ID from href %r' % (href, ))
@@ -363,30 +359,34 @@ def get_invoice_urls():
363359
# order summary is hidden behind submenu which requires a click to be visible
364360

365361
def invoice_finder():
366-
# order summary link is visible on page
367-
elements_raw = self.driver.find_elements(
368-
By.XPATH, '//a[contains(@href, "orderID=")]')
369-
elements = []
370-
for invoice_link in elements_raw:
371-
if invoice_link.text not in self.domain.invoice_link:
372-
# skip invoice if label is not known
373-
# different labels are possible e.g. for regular orders vs. Amazon fresh
374-
if invoice_link.text != "":
375-
# log non-empty link texts -> may be new type
376-
logger.debug(
377-
'Skipping invoice due to unknown invoice_link.text: %s',
378-
invoice_link.text)
379-
else:
380-
elements.append(invoice_link)
381-
return elements
382-
362+
if not self.domain.order_summary_hidden:
363+
# order summary link is visible on page
364+
return self.driver.find_elements(
365+
By.XPATH, '//a[contains(@href, "orderID=")]')
366+
else:
367+
# order summary link is hidden in submenu for each order
368+
elements = self.driver.find_elements(By.XPATH,
369+
'//a[@class="a-popover-trigger a-declarative"]')
370+
return [a for a in elements if a.text == self.domain.invoice]
371+
383372
if initial_iteration:
384373
invoices = invoice_finder()
385374
else:
386375
invoices, = self.wait_and_return(invoice_finder)
387376
initial_iteration = False
388377

378+
last_order_id = None
379+
389380
def invoice_link_finder(invoice_link):
381+
if invoice_link.text not in self.domain.invoice_link:
382+
# skip invoice if label is not known
383+
# different labels are possible e.g. for regular orders vs. Amazon fresh
384+
if invoice_link.text != "":
385+
# log non-empty link texts -> may be new type
386+
logger.debug(
387+
'Skipping invoice due to unknown invoice_link.text: %s',
388+
invoice_link.text)
389+
return (False, False)
390390
href = invoice_link.get_attribute('href')
391391
order_id = self.get_order_id(href)
392392
if self.domain.fresh_fallback is not None and invoice_link.text == self.domain.fresh_fallback:
@@ -397,39 +397,26 @@ def invoice_link_finder(invoice_link):
397397
tokens[-1] = f"gp/css/summary/print.html?orderID={order_id}"
398398
href = "/".join(tokens)
399399
return (order_id, href)
400-
401-
def invoice_link_finder_hidden(invoice_link):
402-
# get order id to later find the correct summary link
403-
order_id=self.get_order_id(invoice_link.get_attribute('href'))
404-
405-
# get parent element to search for invoice menu button (has no orderID specified)
406-
parent=invoice_link.find_element(By.XPATH,"./..")
407-
# leading dot in './/' specifies to only search in children
408-
popover=parent.find_elements(By.XPATH,'.//a[contains(@href, "invoice/invoice.html")]')
409-
# depending on the order group the XPATH may be different
410-
if len(popover) == 0:
411-
popover=parent.find_elements(
412-
By.XPATH,
413-
f'.//a[contains(text(), {self.domain.invoice}) and @class="a-popover-trigger a-declarative"]')
414-
415-
# open invoice popover to extract invoice link
416-
popover[0].click()
417-
418-
# submenu containing order summary takes some time to load after click
419-
summary_link, = self.wait_and_locate(
420-
(By.XPATH,'//a[contains(@href,"{}") and contains(text(),"{}")]'.format(order_id, self.domain.order_summary)))
421-
if summary_link:
422-
href = summary_link.get_attribute('href')
423-
return (order_id, href)
424-
else:
425-
logger.info('Link extraction failed for order id: %r', order_id)
426-
return (False, False)
400+
401+
def invoice_link_finder_hidden():
402+
# submenu containing order summary takes some time to load after click
403+
# search for order summary link and compare order_id
404+
# repeat until order_id is different to last order_id
405+
summary_links = self.driver.find_elements(By.LINK_TEXT,
406+
self.domain.order_summary)
407+
if summary_links:
408+
href = summary_links[0].get_attribute('href')
409+
order_id = self.get_order_id(href)
410+
if order_id != last_order_id:
411+
return (order_id, href)
412+
return False
427413

428414
for invoice_link in invoices:
429415
if not self.domain.order_summary_hidden:
430416
(order_id, href) = invoice_link_finder(invoice_link)
431417
else:
432-
(order_id, href) = invoice_link_finder_hidden(invoice_link)
418+
invoice_link.click()
419+
(order_id, href), = self.wait_and_return(invoice_link_finder_hidden)
433420
if order_id:
434421
if order_id in order_ids_seen:
435422
logger.info('Skipping already-seen order id: %r', order_id)
@@ -440,6 +427,7 @@ def invoice_link_finder_hidden(invoice_link):
440427
logger.info('Found order \'{}\''.format(order_id))
441428
invoice_hrefs.append((href, order_id))
442429
order_ids_seen.add(order_id)
430+
last_order_id = order_id
443431

444432
# Find next link
445433
next_links = self.find_elements_by_descendant_text_match(
@@ -457,9 +445,7 @@ def retrieve_all_order_groups():
457445
order_select_index = 0
458446

459447
while True:
460-
(order_filter,), = self.wait_and_return(
461-
lambda: self.find_visible_elements(By.XPATH, '//select[@name="timeFilter"]')
462-
)
448+
order_filter, = self.wait_and_locate((By.CSS_SELECTOR, '#time-filter, #orderFilter'))
463449
order_select = Select(order_filter)
464450
num_options = len(order_select.options)
465451
if order_select_index >= num_options:
@@ -468,7 +454,7 @@ def retrieve_all_order_groups():
468454
order_select_index]
469455
option_text = option.text.strip()
470456
order_select_index += 1
471-
if option_text == self.domain.archived_orders:
457+
if option_text == 'Archived Orders':
472458
continue
473459
if self.order_groups is not None and option_text not in self.order_groups:
474460
logger.info('Skipping order group: %r', option_text)

0 commit comments

Comments
 (0)