brianleect
diff --git a/‎.gitignore
+2-1 b/‎.gitignore
+2-1
diff --git a/‎combined/combinedLabels.json ‎combined/combinedAccountLabels.json b/‎combined/combinedLabels.json ‎combined/combinedAccountLabels.json
diff --git a/‎combined/combinedAllLabels.json
+1 b/‎combined/combinedAllLabels.json
+1
diff --git a/‎combined/combinedTokenLabels.json
+1 b/‎combined/combinedTokenLabels.json
+1
diff --git a/‎main.py
+72-40 b/‎main.py
+72-40
diff --git a/‎simpleCombineAllJson.py
+1-21 b/‎simpleCombineAllJson.py
+1-21
@@ -1,2 +1,3 @@
 config.json
-old
+old
+*.pyc
@@ -11,56 +11,59 @@ def login():
     driver.get('https://etherscan.io/login')
     driver.implicitly_wait(5)
     driver.find_element("id",
-        "ContentPlaceHolder1_txtUserName").send_keys(config['ETHERSCAN_USER'])
+                        "ContentPlaceHolder1_txtUserName").send_keys(config['ETHERSCAN_USER'])
     driver.find_element(
-        "id","ContentPlaceHolder1_txtPassword").send_keys(config['ETHERSCAN_PASS'])
+        "id", "ContentPlaceHolder1_txtPassword").send_keys(config['ETHERSCAN_PASS'])
 
     input("Press enter once logged in")
 
 
 # Retrieve label information and saves as JSON/CSV
 def getLabel(label, label_type="account", input_type='single'):
-    baseUrl = 'https://etherscan.io/{}s/label/{}?subcatid={}&size=100&start={}' # https://etherscan.io/tokens/label/gaming?subcatid=undefined&size=100&start=0
+    baseUrl = 'https://etherscan.io/{}s/label/{}?subcatid={}&size=100&start={}'
     index = 0  # Initialize start index at 0
     table_list = []
 
-    driver.get(baseUrl.format(label_type, label, 'undefined',index))
+    driver.get(baseUrl.format(label_type, label, 'undefined', index))
     driver.implicitly_wait(5)
-  
+
     # Find all elements using driver.find_elements where class matches "nav-link"
     # This is used to find all subcategories
-    elems = driver.find_elements("class name","nav-link")
+    elems = driver.find_elements("class name", "nav-link")
     subcat_id_list = []
 
     # Loop through elems and append all values to subcat_id_list
     for elem in elems:
         elemVal = elem.get_attribute("val")
-        #print(elem.text,elemVal,elem.get_attribute("class")) # Used for debugging elements returned
-        if (elemVal is not None): subcat_id_list.append(elemVal)
+        # print(elem.text,elemVal,elem.get_attribute("class")) # Used for debugging elements returned
+        if (elemVal is not None):
+            subcat_id_list.append(elemVal)
 
-    print(label,'subcat_values:',subcat_id_list)
+    print(label, 'subcat_values:', subcat_id_list)
 
     # Bug fix: When there's 0 subcat id found aka ONLY MAIN, we manually add 'undefined' to subcat_id_list
-    if (len(subcat_id_list) == 0): subcat_id_list.append('undefined')
+    if (len(subcat_id_list) == 0):
+        subcat_id_list.append('undefined')
 
-    for table_index,subcat_id in enumerate(subcat_id_list):
+    for table_index, subcat_id in enumerate(subcat_id_list):
         index = 0  # Initialize start index at 0
         driver.implicitly_wait(5)
-        driver.get(baseUrl.format(label_type, label, subcat_id,index))
-        time.sleep(5) #TODO: allow customization by args
+        driver.get(baseUrl.format(label_type, label, subcat_id, index))
+        time.sleep(5)  # TODO: allow customization by args
 
         while (True):
-            print('Index:', index,'Subcat:',subcat_id)
+            print('Index:', index, 'Subcat:', subcat_id)
 
             try:
                 # Select relevant table from multiple tables in the page, based on current table index
                 curTable = pd.read_html(driver.page_source)[table_index]
                 if label_type == "account":
-                    curTable = curTable[:-1] # Remove last item which is just sum
+                    # Remove last item which is just sum
+                    curTable = curTable[:-1]
                 print(curTable)
 
                 # Retrieve all addresses from table
-                elems = driver.find_elements("xpath","//tbody//a[@href]")
+                elems = driver.find_elements("xpath", "//tbody//a[@href]")
                 addressList = []
                 addrIndex = len('https://etherscan.io/address/')
                 for elem in elems:
@@ -70,7 +73,7 @@ def getLabel(label, label_type="account", input_type='single'):
 
                 # Replace address column in newTable dataframe with addressList
                 curTable['Address'] = addressList
-            except Exception as e: 
+            except Exception as e:
                 print(e)
                 print(label, "Skipping label due to error")
                 return
@@ -81,17 +84,19 @@ def getLabel(label, label_type="account", input_type='single'):
             if (len(curTable.index) == 100):
                 if label_type == "account":
                     index += 100
-                    driver.get(baseUrl.format(label_type, label, subcat_id,index))
+                    driver.get(baseUrl.format(
+                        label_type, label, subcat_id, index))
                 if label_type == "token":
-                    next_icon_elems = driver.find_elements("class name", "fa-chevron-right")
+                    next_icon_elems = driver.find_elements(
+                        "class name", "fa-chevron-right")
                     next_icon_elems[0].click()
-                time.sleep(5) #TODO: allow customization by args
+                time.sleep(5)  # TODO: allow customization by args
             else:
                 break
 
     df = pd.concat(table_list)  # Combine all dataframes
     df.fillna('', inplace=True)  # Replace NaN as empty string
-    df.index = range(len(df.index)) # Fix index for df
+    df.index = range(len(df.index))  # Fix index for df
 
     # Prints length and save as a csv
     print(label, 'Df length:', len(df.index))
@@ -100,10 +105,10 @@ def getLabel(label, label_type="account", input_type='single'):
     # Save as json object with mapping address:nameTag
     if label_type == "account":
         addressNameDict = dict([(address, nameTag)
-                           for address, nameTag in zip(df.Address, df['Name Tag'])])
+                                for address, nameTag in zip(df.Address, df['Name Tag'])])
     if label_type == "token":
         addressNameDict = dict([(address, nameTag)
-                           for address, nameTag in zip(df.Address, df['Token Name'])])  
+                                for address, nameTag in zip(df.Address, df['Token Name'])])
     with open('{}s/{}.json'.format(label_type, label), 'w', encoding='utf-8') as f:
         json.dump(addressNameDict, f, ensure_ascii=True)
 
@@ -117,7 +122,7 @@ def getLabel(label, label_type="account", input_type='single'):
 
 # Combines all JSON into a single file combinedLabels.json
 def combineAllJson():
-    combinedJSON = {}
+    combinedAccountJSON = {}
 
     # iterating over all files
     for files in os.listdir('./accounts'):
@@ -126,21 +131,45 @@ def combineAllJson():
             with open('./accounts/{}'.format(files)) as f:
                 dictData = json.load(f)
                 for address, nameTag in dictData.items():
-                    if address not in combinedJSON:
-                        combinedJSON[address] = {'name': nameTag, 'labels': []}
-                    combinedJSON[address]['labels'].append(files[:-5])
+                    if address not in combinedAccountJSON:
+                        combinedAccountJSON[address] = {
+                            'name': nameTag, 'labels': []}
+                    combinedAccountJSON[address]['labels'].append(files[:-5])
+        else:
+            continue
+
+    combinedTokenJSON = {}
+    for files in os.listdir('./tokens'):
+        if files.endswith('json'):
+            print(files)  # printing file name of desired extension
+            with open('./tokens/{}'.format(files)) as f:
+                dictData = json.load(f)
+                for address, nameTag in dictData.items():
+                    if address not in combinedTokenJSON:
+                        combinedTokenJSON[address] = {
+                            'name': nameTag, 'labels': []}
+                    combinedTokenJSON[address]['labels'].append(files[:-5])
         else:
             continue
 
-    with open('combined/combinedLabels.json', 'w', encoding='utf-8') as f:
-        json.dump(combinedJSON, f, ensure_ascii=True)
+    combinedAllJSON = {
+        key: { **combinedAccountJSON.get(key, {}), **combinedTokenJSON.get(key, {}) } 
+        for key in set(list(combinedAccountJSON.keys())+list(combinedTokenJSON.keys()))
+    }
+
+    with open('combined/combinedAccountLabels.json', 'w', encoding='utf-8') as f:
+        json.dump(combinedAccountJSON, f, ensure_ascii=True)
+    with open('combined/combinedTokenLabels.json', 'w', encoding='utf-8') as f:
+        json.dump(combinedTokenJSON, f, ensure_ascii=True)
+    with open('combined/combinedAllLabels.json', 'w', encoding='utf-8') as f:
+        json.dump(combinedAllJSON, f, ensure_ascii=True)
 
 # Retrieves all labels from labelcloud and saves as JSON/CSV
 def getAllLabels():
     driver.get('https://etherscan.io/labelcloud')
     driver.implicitly_wait(5)
 
-    elems = driver.find_elements("xpath","//a[@href]")
+    elems = driver.find_elements("xpath", "//a[@href]")
     labels = []
     labelIndex = len('https://etherscan.io/accounts/label/')
     for elem in elems:
@@ -174,19 +203,22 @@ def getAllLabels():
     # Proceed to combine all addresses into single JSON after retrieving all.
     combineAllJson()
 
+
 # Large size: Eth2/gnsos , Bugged: Liqui , NoData: Remaining labels
 ignore_list = ['eth2-depositor', 'gnosis-safe-multisig', 'liqui.io', 'education', 'electronics',
-               'flashbots', 'media', 'music', 'network', 'prediction-market', 'real-estate', 'vpn', 'beacon-depositor','uniswap']
+               'flashbots', 'media', 'music', 'network', 'prediction-market', 'real-estate', 'vpn', 'beacon-depositor', 'uniswap']
 with open('config.json', 'r') as f:
     config = json.load(f)
 
-driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
 
-login()
-retrievalType = input('Enter retrieval type (single/all): ')
-if (retrievalType == 'all'):
-    getAllLabels()
-else:
-    singleLabel = input('Enter label of interest: ')
-    label_type = input('Enter label type (account/token): ')
-    getLabel(singleLabel, label_type)
+if __name__ == "__main__":
+    driver = webdriver.Chrome(service=ChromeService(
+        ChromeDriverManager().install()))
+    login()
+    retrievalType = input('Enter retrieval type (single/all): ')
+    if (retrievalType == 'all'):
+        getAllLabels()
+    else:
+        singleLabel = input('Enter label of interest: ')
+        label_type = input('Enter label type (account/token): ')
+        getLabel(singleLabel, label_type)
@@ -1,26 +1,6 @@
 
 import json
 import os.path
-
-# Combines all JSON into a single file combinedLabels.json
-def combineAllJson():
-    combinedJSON = {}
-
-    # iterating over all files
-    for files in os.listdir('./data'):
-        if files.endswith('json'):
-            print(files)  # printing file name of desired extension
-            with open('./data/{}'.format(files)) as f:
-                dictData = json.load(f)
-                for address, nameTag in dictData.items():
-                    if address not in combinedJSON:
-                        combinedJSON[address] = {'name': nameTag, 'labels': []}
-                    combinedJSON[address]['labels'].append(files[:-5])
-        else:
-            continue
-
-    with open('combined/combinedLabels.json', 'w', encoding='utf-8') as f:
-        json.dump(combinedJSON, f, ensure_ascii=True)
-
+from main import combineAllJson 
 
 combineAllJson()
-Original file line number
+Diff line change
 config.json
 -old
 +old
 +*.pyc