@@ -11,56 +11,59 @@ def login():
11
11
driver .get ('https://etherscan.io/login' )
12
12
driver .implicitly_wait (5 )
13
13
driver .find_element ("id" ,
14
- "ContentPlaceHolder1_txtUserName" ).send_keys (config ['ETHERSCAN_USER' ])
14
+ "ContentPlaceHolder1_txtUserName" ).send_keys (config ['ETHERSCAN_USER' ])
15
15
driver .find_element (
16
- "id" ,"ContentPlaceHolder1_txtPassword" ).send_keys (config ['ETHERSCAN_PASS' ])
16
+ "id" , "ContentPlaceHolder1_txtPassword" ).send_keys (config ['ETHERSCAN_PASS' ])
17
17
18
18
input ("Press enter once logged in" )
19
19
20
20
21
21
# Retrieve label information and saves as JSON/CSV
22
22
def getLabel (label , label_type = "account" , input_type = 'single' ):
23
- baseUrl = 'https://etherscan.io/{}s/label/{}?subcatid={}&size=100&start={}' # https://etherscan.io/tokens/label/gaming?subcatid=undefined&size=100&start=0
23
+ baseUrl = 'https://etherscan.io/{}s/label/{}?subcatid={}&size=100&start={}'
24
24
index = 0 # Initialize start index at 0
25
25
table_list = []
26
26
27
- driver .get (baseUrl .format (label_type , label , 'undefined' ,index ))
27
+ driver .get (baseUrl .format (label_type , label , 'undefined' , index ))
28
28
driver .implicitly_wait (5 )
29
-
29
+
30
30
# Find all elements using driver.find_elements where class matches "nav-link"
31
31
# This is used to find all subcategories
32
- elems = driver .find_elements ("class name" ,"nav-link" )
32
+ elems = driver .find_elements ("class name" , "nav-link" )
33
33
subcat_id_list = []
34
34
35
35
# Loop through elems and append all values to subcat_id_list
36
36
for elem in elems :
37
37
elemVal = elem .get_attribute ("val" )
38
- #print(elem.text,elemVal,elem.get_attribute("class")) # Used for debugging elements returned
39
- if (elemVal is not None ): subcat_id_list .append (elemVal )
38
+ # print(elem.text,elemVal,elem.get_attribute("class")) # Used for debugging elements returned
39
+ if (elemVal is not None ):
40
+ subcat_id_list .append (elemVal )
40
41
41
- print (label ,'subcat_values:' ,subcat_id_list )
42
+ print (label , 'subcat_values:' , subcat_id_list )
42
43
43
44
# Bug fix: When there's 0 subcat id found aka ONLY MAIN, we manually add 'undefined' to subcat_id_list
44
- if (len (subcat_id_list ) == 0 ): subcat_id_list .append ('undefined' )
45
+ if (len (subcat_id_list ) == 0 ):
46
+ subcat_id_list .append ('undefined' )
45
47
46
- for table_index ,subcat_id in enumerate (subcat_id_list ):
48
+ for table_index , subcat_id in enumerate (subcat_id_list ):
47
49
index = 0 # Initialize start index at 0
48
50
driver .implicitly_wait (5 )
49
- driver .get (baseUrl .format (label_type , label , subcat_id ,index ))
50
- time .sleep (5 ) # TODO: allow customization by args
51
+ driver .get (baseUrl .format (label_type , label , subcat_id , index ))
52
+ time .sleep (5 ) # TODO: allow customization by args
51
53
52
54
while (True ):
53
- print ('Index:' , index ,'Subcat:' ,subcat_id )
55
+ print ('Index:' , index , 'Subcat:' , subcat_id )
54
56
55
57
try :
56
58
# Select relevant table from multiple tables in the page, based on current table index
57
59
curTable = pd .read_html (driver .page_source )[table_index ]
58
60
if label_type == "account" :
59
- curTable = curTable [:- 1 ] # Remove last item which is just sum
61
+ # Remove last item which is just sum
62
+ curTable = curTable [:- 1 ]
60
63
print (curTable )
61
64
62
65
# Retrieve all addresses from table
63
- elems = driver .find_elements ("xpath" ,"//tbody//a[@href]" )
66
+ elems = driver .find_elements ("xpath" , "//tbody//a[@href]" )
64
67
addressList = []
65
68
addrIndex = len ('https://etherscan.io/address/' )
66
69
for elem in elems :
@@ -70,7 +73,7 @@ def getLabel(label, label_type="account", input_type='single'):
70
73
71
74
# Replace address column in newTable dataframe with addressList
72
75
curTable ['Address' ] = addressList
73
- except Exception as e :
76
+ except Exception as e :
74
77
print (e )
75
78
print (label , "Skipping label due to error" )
76
79
return
@@ -81,17 +84,19 @@ def getLabel(label, label_type="account", input_type='single'):
81
84
if (len (curTable .index ) == 100 ):
82
85
if label_type == "account" :
83
86
index += 100
84
- driver .get (baseUrl .format (label_type , label , subcat_id ,index ))
87
+ driver .get (baseUrl .format (
88
+ label_type , label , subcat_id , index ))
85
89
if label_type == "token" :
86
- next_icon_elems = driver .find_elements ("class name" , "fa-chevron-right" )
90
+ next_icon_elems = driver .find_elements (
91
+ "class name" , "fa-chevron-right" )
87
92
next_icon_elems [0 ].click ()
88
- time .sleep (5 ) # TODO: allow customization by args
93
+ time .sleep (5 ) # TODO: allow customization by args
89
94
else :
90
95
break
91
96
92
97
df = pd .concat (table_list ) # Combine all dataframes
93
98
df .fillna ('' , inplace = True ) # Replace NaN as empty string
94
- df .index = range (len (df .index )) # Fix index for df
99
+ df .index = range (len (df .index )) # Fix index for df
95
100
96
101
# Prints length and save as a csv
97
102
print (label , 'Df length:' , len (df .index ))
@@ -100,10 +105,10 @@ def getLabel(label, label_type="account", input_type='single'):
100
105
# Save as json object with mapping address:nameTag
101
106
if label_type == "account" :
102
107
addressNameDict = dict ([(address , nameTag )
103
- for address , nameTag in zip (df .Address , df ['Name Tag' ])])
108
+ for address , nameTag in zip (df .Address , df ['Name Tag' ])])
104
109
if label_type == "token" :
105
110
addressNameDict = dict ([(address , nameTag )
106
- for address , nameTag in zip (df .Address , df ['Token Name' ])])
111
+ for address , nameTag in zip (df .Address , df ['Token Name' ])])
107
112
with open ('{}s/{}.json' .format (label_type , label ), 'w' , encoding = 'utf-8' ) as f :
108
113
json .dump (addressNameDict , f , ensure_ascii = True )
109
114
@@ -117,7 +122,7 @@ def getLabel(label, label_type="account", input_type='single'):
117
122
118
123
# Combines all JSON into a single file combinedLabels.json
119
124
def combineAllJson ():
120
- combinedJSON = {}
125
+ combinedAccountJSON = {}
121
126
122
127
# iterating over all files
123
128
for files in os .listdir ('./accounts' ):
@@ -126,21 +131,45 @@ def combineAllJson():
126
131
with open ('./accounts/{}' .format (files )) as f :
127
132
dictData = json .load (f )
128
133
for address , nameTag in dictData .items ():
129
- if address not in combinedJSON :
130
- combinedJSON [address ] = {'name' : nameTag , 'labels' : []}
131
- combinedJSON [address ]['labels' ].append (files [:- 5 ])
134
+ if address not in combinedAccountJSON :
135
+ combinedAccountJSON [address ] = {
136
+ 'name' : nameTag , 'labels' : []}
137
+ combinedAccountJSON [address ]['labels' ].append (files [:- 5 ])
138
+ else :
139
+ continue
140
+
141
+ combinedTokenJSON = {}
142
+ for files in os .listdir ('./tokens' ):
143
+ if files .endswith ('json' ):
144
+ print (files ) # printing file name of desired extension
145
+ with open ('./tokens/{}' .format (files )) as f :
146
+ dictData = json .load (f )
147
+ for address , nameTag in dictData .items ():
148
+ if address not in combinedTokenJSON :
149
+ combinedTokenJSON [address ] = {
150
+ 'name' : nameTag , 'labels' : []}
151
+ combinedTokenJSON [address ]['labels' ].append (files [:- 5 ])
132
152
else :
133
153
continue
134
154
135
- with open ('combined/combinedLabels.json' , 'w' , encoding = 'utf-8' ) as f :
136
- json .dump (combinedJSON , f , ensure_ascii = True )
155
+ combinedAllJSON = {
156
+ key : { ** combinedAccountJSON .get (key , {}), ** combinedTokenJSON .get (key , {}) }
157
+ for key in set (list (combinedAccountJSON .keys ())+ list (combinedTokenJSON .keys ()))
158
+ }
159
+
160
+ with open ('combined/combinedAccountLabels.json' , 'w' , encoding = 'utf-8' ) as f :
161
+ json .dump (combinedAccountJSON , f , ensure_ascii = True )
162
+ with open ('combined/combinedTokenLabels.json' , 'w' , encoding = 'utf-8' ) as f :
163
+ json .dump (combinedTokenJSON , f , ensure_ascii = True )
164
+ with open ('combined/combinedAllLabels.json' , 'w' , encoding = 'utf-8' ) as f :
165
+ json .dump (combinedAllJSON , f , ensure_ascii = True )
137
166
138
167
# Retrieves all labels from labelcloud and saves as JSON/CSV
139
168
def getAllLabels ():
140
169
driver .get ('https://etherscan.io/labelcloud' )
141
170
driver .implicitly_wait (5 )
142
171
143
- elems = driver .find_elements ("xpath" ,"//a[@href]" )
172
+ elems = driver .find_elements ("xpath" , "//a[@href]" )
144
173
labels = []
145
174
labelIndex = len ('https://etherscan.io/accounts/label/' )
146
175
for elem in elems :
@@ -174,19 +203,22 @@ def getAllLabels():
174
203
# Proceed to combine all addresses into single JSON after retrieving all.
175
204
combineAllJson ()
176
205
206
+
177
207
# Large size: Eth2/gnsos , Bugged: Liqui , NoData: Remaining labels
178
208
ignore_list = ['eth2-depositor' , 'gnosis-safe-multisig' , 'liqui.io' , 'education' , 'electronics' ,
179
- 'flashbots' , 'media' , 'music' , 'network' , 'prediction-market' , 'real-estate' , 'vpn' , 'beacon-depositor' ,'uniswap' ]
209
+ 'flashbots' , 'media' , 'music' , 'network' , 'prediction-market' , 'real-estate' , 'vpn' , 'beacon-depositor' , 'uniswap' ]
180
210
with open ('config.json' , 'r' ) as f :
181
211
config = json .load (f )
182
212
183
- driver = webdriver .Chrome (service = ChromeService (ChromeDriverManager ().install ()))
184
213
185
- login ()
186
- retrievalType = input ('Enter retrieval type (single/all): ' )
187
- if (retrievalType == 'all' ):
188
- getAllLabels ()
189
- else :
190
- singleLabel = input ('Enter label of interest: ' )
191
- label_type = input ('Enter label type (account/token): ' )
192
- getLabel (singleLabel , label_type )
214
+ if __name__ == "__main__" :
215
+ driver = webdriver .Chrome (service = ChromeService (
216
+ ChromeDriverManager ().install ()))
217
+ login ()
218
+ retrievalType = input ('Enter retrieval type (single/all): ' )
219
+ if (retrievalType == 'all' ):
220
+ getAllLabels ()
221
+ else :
222
+ singleLabel = input ('Enter label of interest: ' )
223
+ label_type = input ('Enter label type (account/token): ' )
224
+ getLabel (singleLabel , label_type )
0 commit comments