Skip to content

Commit 726b102

Browse files
authored
Add files via upload
1 parent d973a58 commit 726b102

10 files changed

+3130
-0
lines changed

Code_ANN.ipynb

+331
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,331 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 5,
6+
"id": "aab81d47-65e9-40a1-845d-62b661f8a221",
7+
"metadata": {},
8+
"outputs": [
9+
{
10+
"name": "stderr",
11+
"output_type": "stream",
12+
"text": [
13+
"C:\\Users\\36819\\AppData\\Local\\Temp\\ipykernel_44224\\3559580778.py:21: SettingWithCopyWarning: \n",
14+
"A value is trying to be set on a copy of a slice from a DataFrame\n",
15+
"\n",
16+
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
17+
" X['Subluxation_percent'].fillna(X['Subluxation_percent'].mean(), inplace=True)\n",
18+
"C:\\Users\\36819\\AppData\\Local\\Temp\\ipykernel_44224\\3559580778.py:22: SettingWithCopyWarning: \n",
19+
"A value is trying to be set on a copy of a slice from a DataFrame\n",
20+
"\n",
21+
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
22+
" X['Femoral_neck_angle'].fillna(X['Femoral_neck_angle'].mean(), inplace=True)\n",
23+
"C:\\Users\\36819\\AppData\\Local\\Temp\\ipykernel_44224\\3559580778.py:23: SettingWithCopyWarning: \n",
24+
"A value is trying to be set on a copy of a slice from a DataFrame\n",
25+
"\n",
26+
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
27+
" X['Lateral_center_edge_angle'].fillna(X['Lateral_center_edge_angle'].mean(), inplace=True)\n",
28+
"C:\\Users\\36819\\AppData\\Local\\Temp\\ipykernel_44224\\3559580778.py:24: SettingWithCopyWarning: \n",
29+
"A value is trying to be set on a copy of a slice from a DataFrame\n",
30+
"\n",
31+
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
32+
" X['extrusion_index'].fillna(X['extrusion_index'].mean(), inplace=True)\n"
33+
]
34+
},
35+
{
36+
"name": "stdout",
37+
"output_type": "stream",
38+
"text": [
39+
"Accuracy: 58.90%\n",
40+
" precision recall f1-score support\n",
41+
"\n",
42+
" 0 0.33 0.43 0.38 21\n",
43+
" 1 0.74 0.65 0.69 52\n",
44+
"\n",
45+
" accuracy 0.59 73\n",
46+
" macro avg 0.54 0.54 0.53 73\n",
47+
"weighted avg 0.62 0.59 0.60 73\n",
48+
"\n",
49+
"Number of iterations to reach convergence: 1883\n"
50+
]
51+
},
52+
{
53+
"data": {
54+
"text/plain": [
55+
"['cluster_model.joblib']"
56+
]
57+
},
58+
"execution_count": 5,
59+
"metadata": {},
60+
"output_type": "execute_result"
61+
}
62+
],
63+
"source": [
64+
"# Import Libraries\n",
65+
"import pandas as pd\n",
66+
"import numpy as np\n",
67+
"from sklearn.model_selection import train_test_split\n",
68+
"from sklearn.preprocessing import StandardScaler\n",
69+
"from sklearn.neural_network import MLPClassifier\n",
70+
"from sklearn.metrics import accuracy_score, classification_report\n",
71+
"from joblib import dump\n",
72+
"\n",
73+
"# Load Data\n",
74+
"file_path = ' ' #File path here\n",
75+
"data = pd.read_csv(file_path)\n",
76+
"\n",
77+
"# Select Features and Target\n",
78+
"features = ['Gender', 'Subluxation_percent', 'Femoral_neck_angle', 'Lateral_center_edge_angle', 'extrusion_index']\n",
79+
"target = 'Cluster' # Replace with the actual column name for the cluster\n",
80+
"X = data[features]\n",
81+
"y = data[target]\n",
82+
"\n",
83+
"# Preprocess Data\n",
84+
"X['Subluxation_percent'].fillna(X['Subluxation_percent'].mean(), inplace=True)\n",
85+
"X['Femoral_neck_angle'].fillna(X['Femoral_neck_angle'].mean(), inplace=True)\n",
86+
"X['Lateral_center_edge_angle'].fillna(X['Lateral_center_edge_angle'].mean(), inplace=True)\n",
87+
"X['extrusion_index'].fillna(X['extrusion_index'].mean(), inplace=True)\n",
88+
"if X['Gender'].dtype == 'object':\n",
89+
" X['Gender'] = X['Gender'].astype('category').cat.codes\n",
90+
"scaler = StandardScaler()\n",
91+
"X_scaled = scaler.fit_transform(X)\n",
92+
"\n",
93+
"# Split Data\n",
94+
"X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)\n",
95+
"\n",
96+
"# Build and Train ANN\n",
97+
"mlp = MLPClassifier(hidden_layer_sizes=(10, 10), max_iter=2000, random_state=42) # Increase max_iter\n",
98+
"mlp.fit(X_train, y_train)\n",
99+
"\n",
100+
"# Evaluate Model\n",
101+
"y_pred = mlp.predict(X_test)\n",
102+
"accuracy = accuracy_score(y_test, y_pred)\n",
103+
"print(f\"Accuracy: {accuracy * 100:.2f}%\")\n",
104+
"print(classification_report(y_test, y_pred))\n",
105+
"\n",
106+
"# Display Number of Iterations\n",
107+
"print(f\"Number of iterations to reach convergence: {mlp.n_iter_}\")\n",
108+
"\n",
109+
"# Save Model (Optional)\n",
110+
"dump(mlp, 'cluster_model.joblib')\n"
111+
]
112+
},
113+
{
114+
"cell_type": "code",
115+
"execution_count": 6,
116+
"id": "c1d07011-f602-4411-a224-4938ca3ce7f9",
117+
"metadata": {},
118+
"outputs": [
119+
{
120+
"name": "stdout",
121+
"output_type": "stream",
122+
"text": [
123+
"Accuracy: 58.90%\n",
124+
"Precision: 73.91%\n",
125+
"Recall: 65.38%\n",
126+
"F1-Score: 69.39%\n",
127+
"Misclassification Rate: 41.10%\n",
128+
"AUC of ROC: 56.32%\n"
129+
]
130+
}
131+
],
132+
"source": [
133+
"#calculate metrics\n",
134+
"from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix\n",
135+
"\n",
136+
"# Predict Class Labels\n",
137+
"y_pred = mlp.predict(X_test)\n",
138+
"\n",
139+
"# Calculate Accuracy\n",
140+
"accuracy = accuracy_score(y_test, y_pred)\n",
141+
"\n",
142+
"# Calculate Precision\n",
143+
"precision = precision_score(y_test, y_pred)\n",
144+
"\n",
145+
"# Calculate Recall\n",
146+
"recall = recall_score(y_test, y_pred)\n",
147+
"\n",
148+
"# Calculate F1-Score\n",
149+
"f1 = f1_score(y_test, y_pred)\n",
150+
"\n",
151+
"# Calculate Misclassification Rate\n",
152+
"misclassification_rate = 1 - accuracy\n",
153+
"\n",
154+
"# Calculate AUC-ROC\n",
155+
"# Note: You'll need to use predict_proba to get the probability estimates for the positive class\n",
156+
"y_prob = mlp.predict_proba(X_test)[:, 1]\n",
157+
"auc_roc = roc_auc_score(y_test, y_prob)\n",
158+
"\n",
159+
"# Print Results\n",
160+
"print(f\"Accuracy: {accuracy * 100:.2f}%\")\n",
161+
"print(f\"Precision: {precision * 100:.2f}%\")\n",
162+
"print(f\"Recall: {recall * 100:.2f}%\")\n",
163+
"print(f\"F1-Score: {f1 * 100:.2f}%\")\n",
164+
"print(f\"Misclassification Rate: {misclassification_rate * 100:.2f}%\")\n",
165+
"print(f\"AUC of ROC: {auc_roc * 100:.2f}%\")\n"
166+
]
167+
},
168+
{
169+
"cell_type": "code",
170+
"execution_count": 7,
171+
"id": "93c0ad8a-e241-4bc4-a6cc-76e04879932f",
172+
"metadata": {},
173+
"outputs": [
174+
{
175+
"name": "stdout",
176+
"output_type": "stream",
177+
"text": [
178+
"File saved to C:/Work/AI_Sports_Medicine/Hip/DDH/Cluster/Prediction_ANN/internal_predicted_clusters.csv\n"
179+
]
180+
}
181+
],
182+
"source": [
183+
"##CSV of internal dataset\n",
184+
"# Create a new DataFrame with the original data\n",
185+
"new_data = data.copy()\n",
186+
"\n",
187+
"# Add the predicted cluster column\n",
188+
"new_data['predicted_cluster'] = mlp.predict(scaler.transform(X))\n",
189+
"\n",
190+
"# Save the new DataFrame as a CSV file\n",
191+
"output_path = ''\n",
192+
"new_data.to_csv(output_path, index=False)\n",
193+
"\n",
194+
"print(f\"File saved to {output_path}\")\n"
195+
]
196+
},
197+
{
198+
"cell_type": "code",
199+
"execution_count": 8,
200+
"id": "87e3670f-9246-4ef0-9e4c-b6a8d5dca0d1",
201+
"metadata": {},
202+
"outputs": [
203+
{
204+
"name": "stdout",
205+
"output_type": "stream",
206+
"text": [
207+
"External Accuracy: 71.43%\n",
208+
"External Precision: 78.69%\n",
209+
"External Recall: 81.36%\n",
210+
"External F1-Score: 80.00%\n",
211+
"External Misclassification Rate: 28.57%\n",
212+
"External AUC of ROC: 70.92%\n",
213+
"External file saved to C:/Work/AI_Sports_Medicine/Hip/DDH/Cluster/Prediction_ANN/external_predicted_clusters.csv\n"
214+
]
215+
},
216+
{
217+
"name": "stderr",
218+
"output_type": "stream",
219+
"text": [
220+
"C:\\Users\\36819\\AppData\\Local\\Temp\\ipykernel_44224\\4116426935.py:8: SettingWithCopyWarning: \n",
221+
"A value is trying to be set on a copy of a slice from a DataFrame\n",
222+
"\n",
223+
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
224+
" external_X['Subluxation_percent'].fillna(external_X['Subluxation_percent'].mean(), inplace=True)\n",
225+
"C:\\Users\\36819\\AppData\\Local\\Temp\\ipykernel_44224\\4116426935.py:9: SettingWithCopyWarning: \n",
226+
"A value is trying to be set on a copy of a slice from a DataFrame\n",
227+
"\n",
228+
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
229+
" external_X['Femoral_neck_angle'].fillna(external_X['Femoral_neck_angle'].mean(), inplace=True)\n",
230+
"C:\\Users\\36819\\AppData\\Local\\Temp\\ipykernel_44224\\4116426935.py:10: SettingWithCopyWarning: \n",
231+
"A value is trying to be set on a copy of a slice from a DataFrame\n",
232+
"\n",
233+
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
234+
" external_X['Lateral_center_edge_angle'].fillna(external_X['Lateral_center_edge_angle'].mean(), inplace=True)\n",
235+
"C:\\Users\\36819\\AppData\\Local\\Temp\\ipykernel_44224\\4116426935.py:11: SettingWithCopyWarning: \n",
236+
"A value is trying to be set on a copy of a slice from a DataFrame\n",
237+
"\n",
238+
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
239+
" external_X['extrusion_index'].fillna(external_X['extrusion_index'].mean(), inplace=True)\n"
240+
]
241+
}
242+
],
243+
"source": [
244+
"##External validatoin\n",
245+
"# Read the external dataset\n",
246+
"external_file_path = '' #File path here\n",
247+
"external_data = pd.read_csv(external_file_path)\n",
248+
"\n",
249+
"# Select features and preprocess\n",
250+
"external_X = external_data[features]\n",
251+
"external_X['Subluxation_percent'].fillna(external_X['Subluxation_percent'].mean(), inplace=True)\n",
252+
"external_X['Femoral_neck_angle'].fillna(external_X['Femoral_neck_angle'].mean(), inplace=True)\n",
253+
"external_X['Lateral_center_edge_angle'].fillna(external_X['Lateral_center_edge_angle'].mean(), inplace=True)\n",
254+
"external_X['extrusion_index'].fillna(external_X['extrusion_index'].mean(), inplace=True)\n",
255+
"if external_X['Gender'].dtype == 'object':\n",
256+
" external_X['Gender'] = external_X['Gender'].astype('category').cat.codes\n",
257+
"\n",
258+
"# Scale the features\n",
259+
"external_X_scaled = scaler.transform(external_X)\n",
260+
"\n",
261+
"# Groundtruth cluster label (updated column name)\n",
262+
"external_y = external_data['Groundtruth_Cluster']\n",
263+
"\n",
264+
"# Predict clusters for the external dataset\n",
265+
"external_y_pred = mlp.predict(external_X_scaled)\n",
266+
"\n",
267+
"# Calculate metrics\n",
268+
"external_accuracy = accuracy_score(external_y, external_y_pred)\n",
269+
"external_precision = precision_score(external_y, external_y_pred)\n",
270+
"external_recall = recall_score(external_y, external_y_pred)\n",
271+
"external_f1 = f1_score(external_y, external_y_pred)\n",
272+
"external_misclassification_rate = 1 - external_accuracy\n",
273+
"external_y_prob = mlp.predict_proba(external_X_scaled)[:, 1]\n",
274+
"external_auc_roc = roc_auc_score(external_y, external_y_prob)\n",
275+
"\n",
276+
"# Print results\n",
277+
"print(f\"External Accuracy: {external_accuracy * 100:.2f}%\")\n",
278+
"print(f\"External Precision: {external_precision * 100:.2f}%\")\n",
279+
"print(f\"External Recall: {external_recall * 100:.2f}%\")\n",
280+
"print(f\"External F1-Score: {external_f1 * 100:.2f}%\")\n",
281+
"print(f\"External Misclassification Rate: {external_misclassification_rate * 100:.2f}%\")\n",
282+
"print(f\"External AUC of ROC: {external_auc_roc * 100:.2f}%\")\n",
283+
"\n",
284+
"# Create a new DataFrame with the external data\n",
285+
"external_data_with_prediction = external_data.copy()\n",
286+
"\n",
287+
"# Adding the ground truth cluster labels (y_external) to the external predicted data DataFrame\n",
288+
"external_data_with_prediction['ground_truth_cluster'] = external_data['Groundtruth_Cluster']\n",
289+
"\n",
290+
"# Add the predicted cluster column\n",
291+
"external_data_with_prediction['predicted_cluster'] = external_y_pred\n",
292+
"\n",
293+
"# Save the new DataFrame as a CSV file\n",
294+
"external_output_path = ' '\n",
295+
"external_data_with_prediction.to_csv(external_output_path, index=False)\n",
296+
"\n",
297+
"print(f\"External file saved to {external_output_path}\")\n",
298+
"\n"
299+
]
300+
},
301+
{
302+
"cell_type": "code",
303+
"execution_count": null,
304+
"id": "561e6941-5395-47cb-9781-e572633afffb",
305+
"metadata": {},
306+
"outputs": [],
307+
"source": []
308+
}
309+
],
310+
"metadata": {
311+
"kernelspec": {
312+
"display_name": "Python 3 (ipykernel)",
313+
"language": "python",
314+
"name": "python3"
315+
},
316+
"language_info": {
317+
"codemirror_mode": {
318+
"name": "ipython",
319+
"version": 3
320+
},
321+
"file_extension": ".py",
322+
"mimetype": "text/x-python",
323+
"name": "python",
324+
"nbconvert_exporter": "python",
325+
"pygments_lexer": "ipython3",
326+
"version": "3.11.5"
327+
}
328+
},
329+
"nbformat": 4,
330+
"nbformat_minor": 5
331+
}

0 commit comments

Comments
 (0)