@@ -21899,25 +21899,61 @@
21899
21899
]
21900
21900
},
21901
21901
{
21902
- "cell_type": "code",
21903
- "execution_count": null,
21904
- "metadata": {},
21905
- "outputs": [],
21906
- "source": []
21907
- },
21908
- {
21909
- "cell_type": "code",
21910
- "execution_count": null,
21902
+ "cell_type": "markdown",
21911
21903
"metadata": {},
21912
- "outputs": [],
21913
- "source": []
21904
+ "source": [
21905
+ "# Analyze the increase in popularity of a language in the current year due to developer’s interest in the previous year. #301"
21906
+ ]
21914
21907
},
21915
21908
{
21916
21909
"cell_type": "code",
21917
21910
"execution_count": null,
21918
21911
"metadata": {},
21919
21912
"outputs": [],
21920
- "source": []
21913
+ "source": [
21914
+ "import pandas as pd\n",
21915
+ "\n",
21916
+ "# Load the CSV files\n",
21917
+ "file_2022 = pd.read_csv(r\"Data/survey_results_public_2022.csv\")\n",
21918
+ "file_2023 = pd.read_csv(r\"Data/survey_results_public_2023.csv\")\n",
21919
+ "\n",
21920
+ "def preprocess_data(df):\n",
21921
+ " # Fill missing values in relevant columns\n",
21922
+ " df['LanguageWorkedWith'] = df['LanguageWorkedWith'].fillna('')\n",
21923
+ " df['LanguageDesireNextYear'] = df['LanguageDesireNextYear'].fillna('')\n",
21924
+ " return df\n",
21925
+ "\n",
21926
+ "# Preprocess the data\n",
21927
+ "data_2022 = preprocess_data(file_2022)\n",
21928
+ "data_2023 = preprocess_data(file_2023)\n",
21929
+ "\n",
21930
+ "# Function to count the occurrences of each language\n",
21931
+ "def count_languages(df, column):\n",
21932
+ " languages = df[column].str.split(';').explode().str.strip()\n",
21933
+ " return languages.value_counts()\n",
21934
+ "\n",
21935
+ "# Count languages for both years\n",
21936
+ "lang_count_2022 = count_languages(data_2022, 'LanguageWorkedWith')\n",
21937
+ "lang_desire_2022 = count_languages(data_2022, 'LanguageDesireNextYear')\n",
21938
+ "lang_count_2023 = count_languages(data_2023, 'LanguageWorkedWith')\n",
21939
+ "\n",
21940
+ "# Convert to DataFrame for easier comparison\n",
21941
+ "lang_count_2022_df = lang_count_2022.reset_index().rename(columns={'index': 'Language', 'LanguageWorkedWith': 'Count_2022'})\n",
21942
+ "lang_desire_2022_df = lang_desire_2022.reset_index().rename(columns={'index': 'Language', 'LanguageDesireNextYear': 'Desire_2022'})\n",
21943
+ "lang_count_2023_df = lang_count_2023.reset_index().rename(columns={'index': 'Language', 'LanguageWorkedWith': 'Count_2023'})\n",
21944
+ "\n",
21945
+ "# Merge the dataframes\n",
21946
+ "merged_df = lang_count_2022_df.merge(lang_desire_2022_df, on='Language', how='outer').merge(lang_count_2023_df, on='Language', how='outer').fillna(0)\n",
21947
+ "\n",
21948
+ "# Calculate the increase in popularity\n",
21949
+ "merged_df['Increase'] = merged_df['Count_2023'] - merged_df['Count_2022']\n",
21950
+ "merged_df['Interest_to_Popularity'] = (merged_df['Count_2023'] - merged_df['Count_2022']) / merged_df['Desire_2022']\n",
21951
+ "\n",
21952
+ "# Sort by the increase in popularity\n",
21953
+ "merged_df = merged_df.sort_values(by='Increase', ascending=False)\n",
21954
+ "\n",
21955
+ "print(merged_df[['Language', 'Count_2022', 'Desire_2022', 'Count_2023', 'Increase', 'Interest_to_Popularity']])\n"
21956
+ ]
21921
21957
},
21922
21958
{
21923
21959
"cell_type": "markdown",
0 commit comments