Skip to content

Commit 131d9a6

Browse files
authored
Merge pull request #313 from anushkasaxena07/main
Analyze the increase in popularity of a language in the current year …
2 parents 4a8299b + b2dc545 commit 131d9a6

File tree

1 file changed

+48
-12
lines changed

1 file changed

+48
-12
lines changed

Stackoverflow_Survey_Analysis.ipynb

+48-12
Original file line numberDiff line numberDiff line change
@@ -21899,25 +21899,61 @@
2189921899
]
2190021900
},
2190121901
{
21902-
"cell_type": "code",
21903-
"execution_count": null,
21904-
"metadata": {},
21905-
"outputs": [],
21906-
"source": []
21907-
},
21908-
{
21909-
"cell_type": "code",
21910-
"execution_count": null,
21902+
"cell_type": "markdown",
2191121903
"metadata": {},
21912-
"outputs": [],
21913-
"source": []
21904+
"source": [
21905+
"# Analyze the increase in popularity of a language in the current year due to developer’s interest in the previous year. #301"
21906+
]
2191421907
},
2191521908
{
2191621909
"cell_type": "code",
2191721910
"execution_count": null,
2191821911
"metadata": {},
2191921912
"outputs": [],
21920-
"source": []
21913+
"source": [
21914+
"import pandas as pd\n",
21915+
"\n",
21916+
"# Load the CSV files\n",
21917+
"file_2022 = pd.read_csv(r\"Data/survey_results_public_2022.csv\")\n",
21918+
"file_2023 = pd.read_csv(r\"Data/survey_results_public_2023.csv\")\n",
21919+
"\n",
21920+
"def preprocess_data(df):\n",
21921+
" # Fill missing values in relevant columns\n",
21922+
" df['LanguageWorkedWith'] = df['LanguageWorkedWith'].fillna('')\n",
21923+
" df['LanguageDesireNextYear'] = df['LanguageDesireNextYear'].fillna('')\n",
21924+
" return df\n",
21925+
"\n",
21926+
"# Preprocess the data\n",
21927+
"data_2022 = preprocess_data(file_2022)\n",
21928+
"data_2023 = preprocess_data(file_2023)\n",
21929+
"\n",
21930+
"# Function to count the occurrences of each language\n",
21931+
"def count_languages(df, column):\n",
21932+
" languages = df[column].str.split(';').explode().str.strip()\n",
21933+
" return languages.value_counts()\n",
21934+
"\n",
21935+
"# Count languages for both years\n",
21936+
"lang_count_2022 = count_languages(data_2022, 'LanguageWorkedWith')\n",
21937+
"lang_desire_2022 = count_languages(data_2022, 'LanguageDesireNextYear')\n",
21938+
"lang_count_2023 = count_languages(data_2023, 'LanguageWorkedWith')\n",
21939+
"\n",
21940+
"# Convert to DataFrame for easier comparison\n",
21941+
"lang_count_2022_df = lang_count_2022.reset_index().rename(columns={'index': 'Language', 'LanguageWorkedWith': 'Count_2022'})\n",
21942+
"lang_desire_2022_df = lang_desire_2022.reset_index().rename(columns={'index': 'Language', 'LanguageDesireNextYear': 'Desire_2022'})\n",
21943+
"lang_count_2023_df = lang_count_2023.reset_index().rename(columns={'index': 'Language', 'LanguageWorkedWith': 'Count_2023'})\n",
21944+
"\n",
21945+
"# Merge the dataframes\n",
21946+
"merged_df = lang_count_2022_df.merge(lang_desire_2022_df, on='Language', how='outer').merge(lang_count_2023_df, on='Language', how='outer').fillna(0)\n",
21947+
"\n",
21948+
"# Calculate the increase in popularity\n",
21949+
"merged_df['Increase'] = merged_df['Count_2023'] - merged_df['Count_2022']\n",
21950+
"merged_df['Interest_to_Popularity'] = (merged_df['Count_2023'] - merged_df['Count_2022']) / merged_df['Desire_2022']\n",
21951+
"\n",
21952+
"# Sort by the increase in popularity\n",
21953+
"merged_df = merged_df.sort_values(by='Increase', ascending=False)\n",
21954+
"\n",
21955+
"print(merged_df[['Language', 'Count_2022', 'Desire_2022', 'Count_2023', 'Increase', 'Interest_to_Popularity']])\n"
21956+
]
2192121957
},
2192221958
{
2192321959
"cell_type": "markdown",

0 commit comments

Comments
 (0)