From 4bacc5c74c244b25d9cba79a1fcee207553bb55e Mon Sep 17 00:00:00 2001
From: arition <aritionkb@gmail.com>
Date: Thu, 2 Nov 2023 12:43:59 -0400
Subject: [PATCH 1/5] fix csrf token fetching (#92)

---
 finance_dl/paypal.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/finance_dl/paypal.py b/finance_dl/paypal.py
index 31b120d..3148c38 100644
--- a/finance_dl/paypal.py
+++ b/finance_dl/paypal.py
@@ -175,8 +175,8 @@ def get_csrf_token(self):
         logging.info('Getting CSRF token')
         self.driver.get('https://www.paypal.com/myaccount/transactions/')
         # Get CSRF token
-        body_element, = self.wait_and_locate((By.ID, "__react_data__"))
-        attribute_object = json.loads(body_element.get_attribute("data"))
+        body_element, = self.wait_and_locate((By.ID, "__APP_DATA__"))
+        attribute_object = json.loads(body_element.get_attribute("innerHTML"))
         self.csrf_token = attribute_object["_csrf"]
         return self.csrf_token
 

From 196faef778cebb3ceb598fb9f22b0a494652e266 Mon Sep 17 00:00:00 2001
From: Eugeniu Plamadeala <eugeniu@plamadeala.com>
Date: Sun, 12 Nov 2023 22:20:36 -0800
Subject: [PATCH 2/5] venmo: Login fix, month at a time transaction downloads
 (#84)

* Catch up with Sign In page UI changes where the password field is
interactable only after the username is submitted.
Additionally, use a different mechanism to wait for page loading because
it was failing.

* venmo: retrieve transactions one month at a time (code by chandler150).
---
 finance_dl/venmo.py | 83 ++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 70 insertions(+), 13 deletions(-)

diff --git a/finance_dl/venmo.py b/finance_dl/venmo.py
index 6f576e5..bff76ea 100644
--- a/finance_dl/venmo.py
+++ b/finance_dl/venmo.py
@@ -82,7 +82,7 @@ def CONFIG_venmo():
 import os
 import time
 from selenium.webdriver.common.by import By
-from selenium.common.exceptions import NoSuchElementException
+from selenium.common.exceptions import NoSuchElementException, ElementNotInteractableException, StaleElementReferenceException
 from selenium.webdriver.support.ui import Select
 from selenium.webdriver.common.keys import Keys
 
@@ -146,19 +146,67 @@ def __init__(self, credentials, output_directory,
     def check_after_wait(self):
         check_url(self.driver.current_url)
 
+    def find_venmo_username(self):
+        for frame in self.for_each_frame():
+            try:
+                return self.driver.find_elements(By.XPATH, '//input[@type="text" or @type="email"]')
+            except NoSuchElementException:
+                pass
+        raise NoSuchElementException()
+
+    def find_venmo_password(self):
+        for frame in self.for_each_frame():
+            try:
+                return self.driver.find_elements(By.XPATH, '//input[@type="password"]')
+            except NoSuchElementException:
+                pass
+        raise NoSuchElementException()
+
+    def wait_for(self, condition_function):
+        start_time = time.time()
+        while time.time() < start_time + 3:
+            if condition_function():
+                return True
+            else:
+                time.sleep(0.1)
+        raise Exception(
+            'Timeout waiting for {}'.format(condition_function.__name__)
+        )
+
+    def click_through_to_new_page(self, button_text):
+        link = self.driver.find_element(By.XPATH, f'//button[@name="{button_text}"]')
+        link.click()
+
+        def link_has_gone_stale():
+            try:
+                # poll the link with an arbitrary call
+                link.find_elements(By.XPATH, 'doesnt-matter')
+                return False
+            except StaleElementReferenceException:
+                return True
+
+        self.wait_for(link_has_gone_stale)
+
     def login(self):
         if self.logged_in:
             return
         logger.info('Initiating log in')
         self.driver.get('https://venmo.com/account/sign-in')
 
-        (username, password), = self.wait_and_return(
-            self.find_username_and_password_in_any_frame)
-        logger.info('Entering username and password')
-        username.send_keys(self.credentials['username'])
+        #(username, password), = self.wait_and_return(
+        #    self.find_username_and_password_in_any_frame)
+        username = self.wait_and_return(self.find_venmo_username)[0][0]
+        try:
+            logger.info('Entering username')
+            username.send_keys(self.credentials['username'])
+            username.send_keys(Keys.ENTER)
+        except ElementNotInteractableException:
+            # indicates that username already filled in
+            logger.info("Skipped")
+        password = self.wait_and_return(self.find_venmo_password)[0][0]
+        logger.info('Entering password')
         password.send_keys(self.credentials['password'])
-        with self.wait_for_page_load():
-            password.send_keys(Keys.ENTER)
+        self.click_through_to_new_page("Sign in")
         logger.info('Logged in')
         self.logged_in = True
 
@@ -173,7 +221,7 @@ def goto_statement(self, start_date, end_date):
     def download_csv(self):
         logger.info('Looking for CSV link')
         download_button, = self.wait_and_locate(
-            (By.XPATH, '//a[text() = "Download CSV"]'))
+            (By.XPATH, '//*[text() = "Download CSV"]'))
         self.click(download_button)
         logger.info('Waiting for CSV download')
         download_result, = self.wait_and_return(self.get_downloaded_file)
@@ -182,8 +230,8 @@ def download_csv(self):
 
     def get_balance(self, balance_type):
         try:
-            balance_node = self.driver.find_element(
-                By.XPATH, '//*[@class="%s"]/child::*[@class="balance-amt"]' %
+            balance_node =  self.driver.find_element(
+                By.XPATH, '//*[text() = "%s"]/following-sibling::*' %
                 balance_type)
             return balance_node.text
         except NoSuchElementException:
@@ -191,9 +239,11 @@ def get_balance(self, balance_type):
 
     def get_balances(self):
         def maybe_get_balance():
-            start_balance = self.get_balance('start-balance')
-            end_balance = self.get_balance('end-balance')
+            start_balance = self.get_balance('Beginning amount')
+            end_balance = self.get_balance('Ending amount')
             if start_balance is not None and end_balance is not None:
+                start_balance = start_balance.replace("\n", "")
+                end_balance = end_balance.replace("\n", "")
                 return (start_balance, end_balance)
             try:
                 error_node = self.driver.find_element(
@@ -303,13 +353,20 @@ def fetch_history(self):
 
         while start_date <= self.latest_history_date:
             end_date = min(self.latest_history_date,
-                           start_date + datetime.timedelta(days=89))
+                           self.last_day_of_month(start_date))
             self.fetch_statement(start_date, end_date)
             start_date = end_date + datetime.timedelta(days=1)
 
             logger.debug('Venmo hack: waiting 5 seconds between requests')
             time.sleep(5)
 
+
+    def last_day_of_month(self, any_day):
+        # The day 28 exists in every month. 4 days later, it's always next month
+        next_month = any_day.replace(day=28) + datetime.timedelta(days=4)
+        # subtracting the number of the current day brings us back one month
+        return next_month - datetime.timedelta(days=next_month.day)
+
     def run(self):
         self.login()
         self.fetch_history()

From 6720650d5ee74bd806903e6e585eab2427a9e626 Mon Sep 17 00:00:00 2001
From: Jonathan Klabunde Tomer <jonathantomer@gmail.com>
Date: Tue, 16 Apr 2024 20:22:19 -0700
Subject: [PATCH 3/5] amazon: update css selector for transaction history
 window dropdown (#97)

---
 finance_dl/amazon.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/finance_dl/amazon.py b/finance_dl/amazon.py
index d6340dc..a68d76f 100644
--- a/finance_dl/amazon.py
+++ b/finance_dl/amazon.py
@@ -445,9 +445,7 @@ def retrieve_all_order_groups():
             order_select_index = 0
 
             while True:
-                (order_filter,), = self.wait_and_return(
-                    lambda: self.find_visible_elements(By.XPATH, '//select[@name="orderFilter"]')
-                )
+                order_filter, = self.wait_and_locate((By.CSS_SELECTOR, '#time-filter, #orderFilter'))
                 order_select = Select(order_filter)
                 num_options = len(order_select.options)
                 if order_select_index >= num_options:

From dca4505b7354f689d33ce2cf0d31e9e9ad4c364c Mon Sep 17 00:00:00 2001
From: Jonathan Klabunde Tomer <jonathantomer@gmail.com>
Date: Tue, 16 Apr 2024 20:23:06 -0700
Subject: [PATCH 4/5] fix: ignore `.com.google.Chrome.*` files in download dir
 (#96)

seems these now get written during in-progress downloads, confusing
scrape_lib's detection of the download being finished and resulting in
truncated downloads.
---
 finance_dl/scrape_lib.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/finance_dl/scrape_lib.py b/finance_dl/scrape_lib.py
index fa4ca58..cfb4e1d 100644
--- a/finance_dl/scrape_lib.py
+++ b/finance_dl/scrape_lib.py
@@ -195,7 +195,7 @@ def get_downloaded_file(self):
         partial_names = []
         other_names = []
         for name in names:
-            if name.endswith('.part') or name.endswith('.crdownload'):
+            if name.endswith('.part') or name.endswith('.crdownload') or name.startswith('.com.google.Chrome'):
                 partial_names.append(name)
             else:
                 other_names.append(name)

From a87f8f71bea5498fd20e72c78a308a79723f3f03 Mon Sep 17 00:00:00 2001
From: Jonathan Klabunde Tomer <jonathantomer@gmail.com>
Date: Tue, 16 Apr 2024 20:25:04 -0700
Subject: [PATCH 5/5] minor improvements to PG&E scraper (#95)

* actually skip downloads when skipping them
* pge: use mobile site for login it seems to work more reliably
* more PG&E fixes:
* don't wait for page reload after entering credentials; website is now an SPA
and does not reload
* use CSS selectors rather than link text to find billing links for
significant speedup
---
 finance_dl/pge.py | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/finance_dl/pge.py b/finance_dl/pge.py
index dcf2a8f..689390d 100644
--- a/finance_dl/pge.py
+++ b/finance_dl/pge.py
@@ -104,15 +104,15 @@ def login(self):
         if self.logged_in:
             return
         logger.info('Initiating log in')
-        self.driver.get('https://www.pge.com/en/myhome/myaccount/index.page')
+        self.driver.get('https://m.pge.com/')
 
         (username, password), = self.wait_and_return(
             self.find_username_and_password_in_any_frame)
         logger.info('Entering username and password')
         username.send_keys(self.credentials['username'])
         password.send_keys(self.credentials['password'])
-        with self.wait_for_page_load():
-            password.send_keys(Keys.ENTER)
+        password.send_keys(Keys.ENTER)
+        self.wait_and_return(lambda: self.find_visible_elements(By.ID, 'arrowBillPaymentHistory'))
         logger.info('Logged in')
         self.logged_in = True
 
@@ -136,7 +136,7 @@ def process_download(self, download_result, output_dir):
             new_path = self.get_output_path(output_dir, date)
             if os.path.exists(new_path):
                 logger.info('Skipping duplicate download: %s', date)
-                return True
+                return False
             tmp_path = new_path.replace('.pdf', '.tmp.pdf')
             with open(tmp_path, 'wb') as f:
                 download_data = download_result[1]
@@ -157,15 +157,11 @@ def get_bills(self, output_dir):
         actions.send_keys(Keys.ESCAPE)
         actions.perform()
         logger.info('Looking for download link')
-        (bills_link, ), = self.wait_and_return(
-            lambda: self.find_visible_elements_by_descendant_partial_text('BILL & PAYMENT HISTORY', 'h2'))
+        (bills_link, ), = self.wait_and_return(lambda: self.find_visible_elements(By.ID, 'arrowBillPaymentHistory'))
         scrape_lib.retry(lambda: self.click(bills_link), retry_delay=2)
-        (more_link, ), = self.wait_and_return(
-            lambda: self.find_visible_elements_by_descendant_partial_text('View up to 24 months of activity', 'a'))
+        (more_link, ), = self.wait_and_return(lambda: self.find_visible_elements(By.ID, 'href-view-24month-history'))
         scrape_lib.retry(lambda: self.click(more_link), retry_delay=2)
-        links, = self.wait_and_return(
-            lambda: self.find_visible_elements(By.PARTIAL_LINK_TEXT, "View Bill PDF")
-        )
+        links, = self.wait_and_return(lambda: self.find_visible_elements(By.CSS_SELECTOR, ".utag-bill-history-view-bill-pdf"))
 
         for link in links:
             if not self.do_download_from_link(link, output_dir) and self.stop_early: