Skip to content
This repository was archived by the owner on Jun 23, 2024. It is now read-only.

Commit 5289289

Browse files
committed
added total comp
1 parent 9d3829f commit 5289289

File tree

6 files changed

+124
-56
lines changed

6 files changed

+124
-56
lines changed

data/meta_info.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"totalPosts": 6670, "totalPostsFromIndia": 3592, "lastUpdated": "2022/02/11 14:43:07", "top20Companies": [["Amazon", 294], ["Microsoft", 157], ["Oracle", 109], ["Goldman Sachs", 69], ["Google", 67], ["Paytm", 66], ["Flipkart", 65], ["Walmart", 63], ["Ola", 62], ["Oyo", 52], ["Paypal", 46], ["Wipro", 45], ["Infosys", 41], ["Swiggy", 38], ["Adobe", 37], ["Salesforce", 36], ["Uber", 35], ["Vmware", 35], ["Tcs", 33], ["Arcesium", 31]], "mostOffersInLastMonth": [["Amazon", 18], ["Microsoft", 15], ["Google", 12], ["Walmart", 9], ["Wipro", 9], ["Paytm", 8], ["Oracle", 8], ["Flipkart", 8], ["Cognizant", 8], ["Goldman Sachs", 5]]}
1+
{"totalPosts": 6710, "totalPostsFromIndia": 3617, "totalPostsWithTotalComp": 2153, "lastUpdated": "2022/02/11 17:47:12", "top20Companies": [["Amazon", 294], ["Microsoft", 158], ["Oracle", 110], ["Google", 70], ["Goldman Sachs", 69], ["Flipkart", 67], ["Paytm", 66], ["Walmart", 63], ["Ola", 62], ["Oyo", 52], ["Paypal", 47], ["Wipro", 46], ["Infosys", 41], ["Adobe", 38], ["Swiggy", 38], ["Salesforce", 36], ["Uber", 35], ["Vmware", 35], ["Tcs", 33], ["Arcesium", 31]], "mostOffersInLastMonth": [["Amazon", 18], ["Microsoft", 16], ["Google", 15], ["Flipkart", 10], ["Wipro", 10], ["Oracle", 9], ["Walmart", 9], ["Paytm", 8], ["Cognizant", 8], ["Accenture", 6]]}

index.html

+3-2
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,9 @@ <h1 id="leetComp" class="col">💸 LeetComp 💸</h1>
8888
<th scope="col" style="width:25%">Role</th>
8989
<th scope="col" style="width:10%">Yoe <span class="sortButton" onclick="sortBySalary(this)"
9090
id="cleanYoe"></span></th>
91-
<th scope="col" style="width:14%">Salary (base) <span class="sortButton"
92-
onclick="sortBySalary(this)" id="cleanSalary"></span></th>
91+
<th scope="col" style="width:14%">Salary <span class="sortButton" onclick="sortBySalary(this)"
92+
id="cleanSalary"></span> <span class="sortButton" onclick="sortBySalary(this)"
93+
id="cleanSalaryTotal"></span></th>
9394
<th scope="col" style="width:10%">Date <span class="sortButton" onclick="sortBySalary(this)"
9495
id="date"></span></th>
9596
<th scope="col" style="width:6%">Views <span class="sortButton" onclick="sortBySalary(this)"

js/data.js

+3-3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

js/scripts.js

+58-29
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,18 @@ var data = [];
33

44
// Data ix and key (we dropped the keys to reduce data size and save network cost)
55
keyMap = {
6-
"id": 0, "title": 1, "voteCount": 2, "viewCount": 3, "date": 4, "company": 5,
7-
"role": 6, "yoe": 7, "salary": 8, "city": 9, "country": 10, "cleanYoe": 11, "cleanSalary": 12,
8-
"yrOrPm": 13, "cleanCompany": 14
6+
"id": 0, "voteCount": 1, "viewCount": 2, "date": 3, "company": 4, "role": 5,
7+
"cleanYoe": 6, "cleanSalary": 7, "yrOrPm": 8, "cleanSalaryTotal": 9, "cleanCompany": 10
98
}
109

1110
// Constants
12-
var pageSize = 25;
11+
var pageSize = 20;
1312
var nPages = Math.ceil(data.length / pageSize);
1413

1514
function setFullTimeOrInternship(yrOrPm) {
1615
window.data = [];
1716
for (i = 0; i < allData.length; i++) {
18-
if (allData[i][13] == yrOrPm) {
17+
if (allData[i][keyMap["yrOrPm"]] == yrOrPm) {
1918
window.data.push(allData[i]);
2019
}
2120
}
@@ -32,27 +31,46 @@ var tableTbodyRef = document.getElementById("postInfo").getElementsByTagName("tb
3231
function getAllBaseSalaries() {
3332
var salaries = [];
3433
for (i = 0; i < data.length; i++) {
35-
salaries.push(data[i][12] / 100000)
34+
salaries.push(data[i][keyMap["cleanSalary"]] / 100000)
35+
}
36+
return salaries;
37+
}
38+
39+
function getAllTotalSalaries() {
40+
var salaries = [];
41+
for (i = 0; i < data.length; i++) {
42+
if (data[i][keyMap["cleanSalaryTotal"]] != -1) {
43+
salaries.push(data[i][keyMap["cleanSalaryTotal"]] / 100000);
44+
}
3645
}
3746
return salaries;
3847
}
3948

4049
function plotSalaryBarChartData() {
4150
salaries = getAllBaseSalaries();
42-
var trace = {
51+
totalSalaries = getAllTotalSalaries();
52+
var trace1 = {
4353
x: salaries,
54+
name: "base",
4455
type: "histogram",
4556
opacity: 0.5,
4657
marker: { color: "green" }
4758
};
59+
var trace2 = {
60+
x: totalSalaries,
61+
name: "total",
62+
type: "histogram",
63+
opacity: 0.5,
64+
marker: { color: "red" }
65+
};
4866
var layout = {
4967
title: { text: "# salaries #", font: { size: 12 } },
5068
height: 400,
5169
margin: { t: 20, l: 0, r: 0 },
5270
yaxis: { automargin: true },
5371
xaxis: { tickprefix: "₹ ", ticksuffix: " lpa" }
5472
};
55-
var salaryBarChart = [trace];
73+
var salaryBarChart = [trace1, trace2];
5674
Plotly.newPlot("salaryBarChart", salaryBarChart, layout);
5775
}
5876
plotSalaryBarChartData();
@@ -85,20 +103,20 @@ plotTopCompaniesChartData();
85103
function plotSalaryYoeBinsChart() {
86104
var yoeBin1 = []; var yoeBin2 = []; var yoeBin3 = []; var yoeBin4 = []; var yoeBin5 = [];
87105
for (i = 0; i < data.length; i++) {
88-
if (data[i][11] >= 0 && data[i][11] < 1) {
89-
yoeBin1.push(data[i][12]);
106+
if (data[i][keyMap["cleanYoe"]] >= 0 && data[i][keyMap["cleanYoe"]] < 1) {
107+
yoeBin1.push(data[i][keyMap["cleanSalary"]]);
90108
}
91-
else if (data[i][11] >= 1 && data[i][11] < 3) {
92-
yoeBin2.push(data[i][12]);
109+
else if (data[i][keyMap["cleanYoe"]] >= 1 && data[i][keyMap["cleanYoe"]] < 3) {
110+
yoeBin2.push(data[i][keyMap["cleanSalary"]]);
93111
}
94-
else if (data[i][11] >= 3 && data[i][11] < 6) {
95-
yoeBin3.push(data[i][12]);
112+
else if (data[i][keyMap["cleanYoe"]] >= 3 && data[i][keyMap["cleanYoe"]] < 6) {
113+
yoeBin3.push(data[i][keyMap["cleanSalary"]]);
96114
}
97-
else if (data[i][11] >= 6 && data[i][11] < 9) {
98-
yoeBin4.push(data[i][12]);
115+
else if (data[i][keyMap["cleanYoe"]] >= 6 && data[i][keyMap["cleanYoe"]] < 9) {
116+
yoeBin4.push(data[i][keyMap["cleanSalary"]]);
99117
}
100-
else if (data[i][11] >= 9) {
101-
yoeBin5.push(data[i][12]);
118+
else if (data[i][keyMap["cleanYoe"]] >= 9) {
119+
yoeBin5.push(data[i][keyMap["cleanSalary"]]);
102120
}
103121
}
104122
var trace1 = {
@@ -156,19 +174,29 @@ function getFormattedYoe(yoe) {
156174
}
157175
}
158176

177+
function getFormattedTotalSalary(totalSalary) {
178+
if (totalSalary == -1) {
179+
return "<button class='btn-danger'>n/a</button>";
180+
}
181+
else {
182+
return "₹ " + totalSalary.toLocaleString("en-IN");
183+
}
184+
}
185+
159186
// Add rows to the postInfo table
160187
function updatePostsTableContent(startIndex, endIndex) {
161188
var myHtmlContent = "";
162189
endIndex = Math.min(data.length, endIndex)
163190
for (var i = startIndex; i < endIndex; i++) {
164-
myHtmlContent += "<tr><td>" + data[i][5] + "</td>";
165-
myHtmlContent += "<td>" + data[i][6].toLowerCase() + "</td>";
166-
myHtmlContent += "<td>" + getFormattedYoe(data[i][11]) + "</td>";
167-
myHtmlContent += "<td>₹ " + data[i][12].toLocaleString("en-IN") + "</td>";
168-
myHtmlContent += "<td>" + data[i][4] + "</td>";
169-
myHtmlContent += "<td>" + data[i][3] + "</td>";
170-
myHtmlContent += "<td>" + data[i][2] + "</td>";
171-
myHtmlContent += "<td>" + data[i][0] + "</td></tr>";
191+
myHtmlContent += "<tr><td>" + data[i][keyMap["company"]] + "</td>";
192+
myHtmlContent += "<td>" + data[i][keyMap["role"]].toLowerCase() + "</td>";
193+
myHtmlContent += "<td>" + getFormattedYoe(data[i][keyMap["cleanYoe"]]) + "</td>";
194+
myHtmlContent += "<td>base: ₹ " + data[i][keyMap["cleanSalary"]].toLocaleString("en-IN");
195+
myHtmlContent += "<br>total: " + getFormattedTotalSalary(data[i][keyMap["cleanSalaryTotal"]]) + "</td>";
196+
myHtmlContent += "<td>" + data[i][keyMap["date"]] + "</td>";
197+
myHtmlContent += "<td>" + data[i][keyMap["viewCount"]] + "</td>";
198+
myHtmlContent += "<td>" + data[i][keyMap["voteCount"]] + "</td>";
199+
myHtmlContent += "<td>" + data[i][keyMap["id"]] + "</td></tr>";
172200
}
173201
tableTbodyRef.innerHTML = myHtmlContent;
174202
};
@@ -249,14 +277,14 @@ function filterSearchIndexes(ixs) {
249277
window.data = [];
250278
if (document.getElementById("fullTimeButton").classList.contains("active")) {
251279
for (i = 0; i < ixs.length; i++) {
252-
if (allData[ixs[i]][13] == "yearly") {
280+
if (allData[ixs[i]][keyMap["yrOrPm"]] == "yearly") {
253281
window.data.push(allData[ixs[i]]);
254282
}
255283
}
256284
}
257285
else if (document.getElementById("internshipButton").classList.contains("active")) {
258286
for (i = 0; i < ixs.length; i++) {
259-
if (allData[ixs[i]][13] == "monthly") {
287+
if (allData[ixs[i]][keyMap["yrOrPm"]] == "monthly") {
260288
window.data.push(allData[ixs[i]]);
261289
}
262290
}
@@ -311,7 +339,7 @@ function _yoeFilter(e) {
311339
}
312340
window.data = [];
313341
for (i = 0; i < allData.length; i++) {
314-
yoe = parseFloat(allData[i][11]);
342+
yoe = parseFloat(allData[i][keyMap["cleanYoe"]]);
315343
if (yoe >= minYoe && yoe <= maxYoe) {
316344
window.data.push(allData[i]);
317345
}
@@ -331,6 +359,7 @@ for (i = 0; i < metaInfo["mostOffersInLastMonth"].length; i++) {
331359
// Stats
332360
document.getElementById("stats").innerHTML = "Total Posts: " + metaInfo["totalPosts"]
333361
+ " | Posts from India: " + metaInfo["totalPostsFromIndia"]
362+
+ " | Posts with Total Comp: " + metaInfo["totalPostsWithTotalComp"]
334363
+ " | Last updated: " + metaInfo["lastUpdated"]
335364

336365

leetcomp/ner_heuristic.py

+59-21
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111

1212

1313
BASE_SALARY_RANGE_INDIA = (2_00_000, 100_00_000)
14+
TOTAL_SALARY_RANGE_INDIA = (2_00_000, 200_00_000)
15+
TOTAL_TO_BASE_MAX_RATIO = 2.5
1416
INTERN_SALARY_RANGE_INDIA = (10_000, 2_00_000)
1517

1618
LABEL_SPECIFICATION = {
@@ -24,7 +26,7 @@
2426
"RE_SALARY": re.compile(r"(salary|base|base pay)\s?[:-]-?\s?(?P<label>[\w\,\₹\$\.\/\-\(\)\`\\u20b9&#8377;\~ ]+)"),
2527
"RE_LOCATION": re.compile(r"location\s?[:-]-?\s?(?P<label>[\w\,\` ]+)"),
2628
"RE_SALARY_TOTAL": re.compile(
27-
r"\ntot?al (1st year\s)?(comp[e|a]nsation|comp|ctc)(\sfor 1st year)?(\s?\(\s?(salary|base).+?\))?(?P<label>.+)"
29+
r"\\ntot?al (1st year\s)?(comp[e|a]nsation|comp|ctc)(\sfor 1st year)?(\s?\(\s?(salary|base).+?\))?(?P<label>.+)"
2830
),
2931
"RE_SALARY_CLEAN_LPA": re.compile(r"(\d{1,3}(\.\d{1,2})?)\s?(lpa|lakh|lac|l)"),
3032
}
@@ -63,16 +65,17 @@ def _find_matches(regex_pattern: Pattern[str], content: str) -> List[str]:
6365

6466

6567
def _get_info_as_flat_list(
66-
companies: List[str], titles: List[str], yoes: List[str], salaries: List[str], info: Dict[str, Any]
68+
companies: List[str], roles: List[str], yoes: List[str], pays: List[str], pays_t: List[str], info: Dict[str, Any]
6769
) -> List[Dict[str, Any]]:
68-
n_info = min([len(companies), len(titles), len(yoes), len(salaries)])
70+
n_info = min([len(companies), len(roles), len(yoes), len(pays)])
6971
expanded_info = []
7072
for _ in range(n_info):
7173
_info = info.copy()
7274
_info["company"] = companies[0]
73-
_info["role"] = titles[0]
75+
_info["role"] = roles[0]
7476
_info["yoe"] = yoes[0]
75-
_info["salary"] = salaries[0]
77+
_info["salary"] = pays[0]
78+
_info["salaryTotal"] = pays_t[0] if pays_t else ""
7679
expanded_info.append(_info)
7780
return expanded_info
7881

@@ -133,6 +136,9 @@ def _report(raw_info: List[Dict[str, Any]]) -> None:
133136
logger.info(f"Posts with Location: {len([r for r in raw_info if 'country' in r])}")
134137
logger.info(f"Posts with YOE: {len([r for r in raw_info if r['cleanYoe'] >= 0])}")
135138
logger.info(f"Posts from India: {len([r for r in raw_info if 'country' in r and r['country'] == 'india'])}")
139+
logger.info(
140+
f"Posts with Total Comp: {len([r for r in raw_info if 'cleanSalaryTotal' in r and r['cleanSalaryTotal'] != -1.0])}"
141+
)
136142

137143

138144
def _is_valid_yearly_base_pay_from_india(base_pay: float):
@@ -143,13 +149,23 @@ def _is_valid_monthly_internship_pay_from_india(base_pay: float):
143149
return base_pay >= INTERN_SALARY_RANGE_INDIA[0] and base_pay <= INTERN_SALARY_RANGE_INDIA[1]
144150

145151

152+
def _is_valid_monthly_total_pay_from_india(base_pay: float):
153+
return base_pay >= TOTAL_SALARY_RANGE_INDIA[0] and base_pay <= TOTAL_SALARY_RANGE_INDIA[1]
154+
155+
146156
def _filter_invalid_salaries(raw_info: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
147157
n_india = 0
148158
n_dropped = 0
149159
filtered_info = []
150160
for r in raw_info:
151161
if "country" in r and r["country"] == "india":
152162
n_india += 1
163+
if "cleanSalaryTotal" in r and r["cleanSalaryTotal"] != -1:
164+
if not _is_valid_monthly_total_pay_from_india(r["cleanSalaryTotal"]):
165+
r["cleanSalaryTotal"] = -1.0
166+
elif r["cleanSalaryTotal"] / r["cleanSalary"] > TOTAL_TO_BASE_MAX_RATIO:
167+
r["cleanSalaryTotal"] = -1.0
168+
153169
if r["yrOrPm"] == "yearly" and not _is_valid_yearly_base_pay_from_india(r["cleanSalary"]):
154170
n_dropped += 1
155171
continue
@@ -162,6 +178,28 @@ def _filter_invalid_salaries(raw_info: List[Dict[str, Any]]) -> List[Dict[str, A
162178
return filtered_info
163179

164180

181+
def _add_clean_yoe_and_salaries(expanded_info: List[Dict[str, Any]], info: Dict[str, Any], title: str) -> None:
182+
for info in expanded_info:
183+
info["cleanYoe"] = _get_clean_yoe(info["yoe"].lower(), _preprocess_text(title).lower(), info["role"].lower())
184+
if "country" in info and info["country"] == "india":
185+
if "\\n" in info["salary"].replace(",", "").lower():
186+
info["cleanSalary"], info["yrOrPm"] = _get_clean_salary_for_india(
187+
info["salary"].replace(",", "").lower().split("\\n")[0]
188+
)
189+
else:
190+
info["cleanSalary"], info["yrOrPm"] = _get_clean_salary_for_india(
191+
info["salary"].replace(",", "").lower()
192+
)
193+
if info["yrOrPm"] == "yearly":
194+
total_salary, _ = _get_clean_salary_for_india(
195+
info["salaryTotal"].replace(",", "").lower().split("\\n")[0]
196+
)
197+
if info["cleanSalary"] != -1 and total_salary > info["cleanSalary"]:
198+
info["cleanSalaryTotal"] = total_salary
199+
else:
200+
info["cleanSalaryTotal"] = -1
201+
202+
165203
def _get_clean_company_text(company: str) -> str:
166204
return " ".join(re.findall(r"\w+", company.lower()))
167205

@@ -179,6 +217,14 @@ def _add_clean_companies(raw_info: List[Dict[str, Any]]) -> None:
179217
r["cleanCompany"] = " ".join([txt.capitalize() for txt in clean_company.split(" ")])
180218

181219

220+
def _drop_info(raw_info: List[Dict[str, Any]]) -> None:
221+
for r in raw_info:
222+
try:
223+
del r["title"], r["yoe"], r["salary"], r["salaryTotal"], r["city"], r["country"]
224+
except KeyError:
225+
continue
226+
227+
182228
def _save_raw_info(raw_info: List[Dict[str, Any]]) -> None:
183229
with open("data/posts_info.json", "w") as f:
184230
json.dump(raw_info, f)
@@ -197,6 +243,9 @@ def _save_meta_info(total_posts: int, raw_info: List[Dict[str, Any]]) -> Dict[st
197243
meta_info = {
198244
"totalPosts": total_posts,
199245
"totalPostsFromIndia": len([r for r in raw_info if "country" in r and r["country"] == "india"]),
246+
"totalPostsWithTotalComp": len(
247+
[r for r in raw_info if "cleanSalaryTotal" in r and r["cleanSalaryTotal"] != -1.0]
248+
),
200249
"lastUpdated": datetime.now().strftime("%Y/%m/%d %H:%M:%S"),
201250
"top20Companies": top_20,
202251
"mostOffersInLastMonth": most_offers,
@@ -229,39 +278,28 @@ def parse_posts_and_save_tagged_info() -> None:
229278
roles = _find_matches(LABEL_SPECIFICATION["RE_ROLE"], clean_content)
230279
yoes = _find_matches(LABEL_SPECIFICATION["RE_YOE"], clean_content)
231280
salaries = _find_matches(LABEL_SPECIFICATION["RE_SALARY"], clean_content)
281+
total_salaies = _find_matches(LABEL_SPECIFICATION["RE_SALARY_TOTAL"], clean_content)
232282
if companies and roles and yoes and salaries:
233-
expanded_info = _get_info_as_flat_list(companies, roles, yoes, salaries, info)
283+
expanded_info = _get_info_as_flat_list(companies, roles, yoes, salaries, total_salaies, info)
234284
location = _get_clean_location(_preprocess_text(r.title), clean_content)
235285
if location[1]:
236286
for info in expanded_info:
237287
info["city"] = location[0]; info["country"] = location[1]
238-
for info in expanded_info:
239-
info["cleanYoe"] = _get_clean_yoe(
240-
info["yoe"].lower(), _preprocess_text(r.title).lower(), info["role"].lower()
241-
)
242-
if "country" in info and info["country"] == "india":
243-
if "\\n" in info["salary"].replace(",", "").lower():
244-
info["cleanSalary"], info["yrOrPm"] = _get_clean_salary_for_india(
245-
info["salary"].replace(",", "").lower().split("\\n")[0]
246-
)
247-
else:
248-
info["cleanSalary"], info["yrOrPm"] = _get_clean_salary_for_india(
249-
info["salary"].replace(",", "").lower()
250-
)
288+
_add_clean_yoe_and_salaries(expanded_info, info, r.title)
251289
raw_info += expanded_info
252290
else:
253291
n_dropped += 1
254292
# fmt: on
255-
256293
logger.info(f"Total posts: {total_posts}")
257294
logger.info(f"N posts dropped (missing data): {n_dropped}")
258295
_report(raw_info)
259296
raw_info = _filter_invalid_salaries(raw_info)
260297

261298
_add_clean_companies(raw_info)
262299
raw_info = sorted(raw_info, key=lambda x: x["date"], reverse=True)
263-
_save_raw_info(raw_info)
264300
meta_info = _save_meta_info(total_posts, raw_info)
301+
_drop_info(raw_info)
302+
_save_raw_info(raw_info)
265303
_update_data_in_js(raw_info, meta_info)
266304

267305

posts.db

40 KB
Binary file not shown.

0 commit comments

Comments
 (0)