Skip to content

added my notebook where I did work on a wine quality datasett #115

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
228 changes: 228 additions & 0 deletions p1ch6/1_neural_networks_wine_quality.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import numpy as np\n",
"import torch\n",
"import torch.optim as optim\n",
"import torch.nn as nn\n",
"import csv\n",
"torch.set_printoptions(edgeitems=2, linewidth=75)"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Input train shapetorch.Size([3919, 11]), input validation torch.Size([979, 11])\n",
"Score onehot train shapetorch.Size([3919, 10]), Score onehot validation shapetorch.Size([979, 10])\n"
]
}
],
"source": [
"#First we are importing the dataset\n",
"\n",
"wine_path = \"../data/p1ch4/tabular-wine/winequality-white.csv\"\n",
"wineq_numpy = np.loadtxt(wine_path, dtype=np.float32, delimiter=\";\",\n",
" skiprows=1)\n",
"\n",
"wineq = torch.from_numpy(wineq_numpy)\n",
"\n",
"#Splitting scores from actual input\n",
"input_data = wineq[:, :-1] #\n",
"wine_score = wineq[:, -1].long() #\n",
"\n",
"#normalazing input data\n",
"input_data_mean = torch.mean(data, dim=0)\n",
"input_data_var = torch.var(data, dim=0)\n",
"input_data_normalized = (data - data_mean) / torch.sqrt(data_var)\n",
"\n",
"#splitting into train and val dataset\n",
"n_samples = data.shape[0]\n",
"n_val = int(0.2 * n_samples) #int is important)\n",
"\n",
"shuffled_indices = torch.randperm(n_samples)\n",
"\n",
"train_indices = shuffled_indices[:-n_val]\n",
"val_indices = shuffled_indices[-n_val:]\n",
"\n",
"input_data_train_norm = input_data_normalized[train_indices]\n",
"wine_score_train = wine_score[train_indices]\n",
"\n",
"input_data_val_norm = input_data_normalized[val_indices]\n",
"wine_score_val = wine_score[val_indices]\n",
"\n",
"print(f'Input train shape{input_data_train_norm.shape}, input validation {input_data_val_norm.shape}')\n",
"\n",
"#creating one_hot representation of our score\n",
"\n",
"wine_score_train_onehot = torch.zeros(wine_score_train.shape[0], 10)\n",
"wine_score_val_onehot = torch.zeros(wine_score_val.shape[0], 10)\n",
"wine_score_train_onehot.scatter_(1, wine_score_train.unsqueeze(1), 1.0)\n",
"wine_score_val_onehot.scatter_(1, wine_score_val.unsqueeze(1), 1.0)\n",
"\n",
"print(f'Score onehot train shape{wine_score_train_onehot.shape}, Score onehot validation shape{wine_score_val_onehot.shape}')"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1, Training loss 2.2654, Validation loss 2.2555\n",
"Epoch 1000, Training loss 0.8641, Validation loss 0.9909\n",
"Epoch 2000, Training loss 0.6084, Validation loss 1.0962\n",
"Epoch 3000, Training loss 0.4536, Validation loss 1.3623\n"
]
}
],
"source": [
"def training_loop(n_epochs, optimizer, model, loss_fn, input_data_train_norm, input_data_val_norm,\n",
" wine_score_train, wine_score_val):\n",
" for epoch in range(1, n_epochs + 1):\n",
" prediction_train = model(input_data_train_norm) # <1>\n",
" loss_train = loss_fn(prediction_train, wine_score_train)\n",
"\n",
" prediction_val = model(input_data_val_norm) # <1>\n",
" loss_val = loss_fn(prediction_val, wine_score_val)\n",
" \n",
" optimizer.zero_grad()\n",
" loss_train.backward() # <2>\n",
" optimizer.step()\n",
"\n",
" if epoch == 1 or epoch % 1000 == 0:\n",
" print(f\"Epoch {epoch}, Training loss {loss_train.item():.4f},\"\n",
" f\" Validation loss {loss_val.item():.4f}\")\n",
"\n",
"neuron_count = 100\n",
"\n",
"seq_model = nn.Sequential(\n",
" nn.Linear(11, neuron_count),\n",
" nn.Tanh(),\n",
" nn.Linear(neuron_count, 10))\n",
"\n",
"loss_fn = nn.CrossEntropyLoss()\n",
"optimizer = optim.Adam(seq_model.parameters())\n",
"\n",
"training_loop(\n",
" n_epochs = 3000, \n",
" optimizer = optimizer,\n",
" model = seq_model,\n",
" loss_fn = loss_fn,\n",
" input_data_train_norm = input_data_train_norm,\n",
" input_data_val_norm = input_data_val_norm, \n",
" wine_score_train = wine_score_train_onehot,\n",
" wine_score_val = wine_score_val_onehot)"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(tensor([-5.3939, -5.1573, -5.0202, -3.7603, -2.3114, 2.6902, 6.2457,\n",
" 0.5261, -6.0117, -2.4114], grad_fn=<SelectBackward0>),\n",
" tensor([0., 0., 0., 0., 0., 0., 1., 0., 0., 0.]))"
]
},
"execution_count": 78,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"seq_model(data_train_norm)[2], target_train_onehot[2]"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"scrolled": true
},
"outputs": [
{
"ename": "TypeError",
"evalue": "Adam.__init__() missing 1 required positional argument: 'params'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[39], line 13\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# Assuming loss_fn is a loss function\u001b[39;00m\n\u001b[1;32m 12\u001b[0m loss_fn \u001b[38;5;241m=\u001b[39m nn\u001b[38;5;241m.\u001b[39mMSELoss()\n\u001b[0;32m---> 13\u001b[0m optimizer \u001b[38;5;241m=\u001b[39m optim\u001b[38;5;241m.\u001b[39mAdam()\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m epoch \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m1000\u001b[39m):\n\u001b[1;32m 15\u001b[0m optimizer\u001b[38;5;241m.\u001b[39mzero_grad()\n",
"\u001b[0;31mTypeError\u001b[0m: Adam.__init__() missing 1 required positional argument: 'params'"
]
}
],
"source": [
"neuron_count = 100\n",
"\n",
"seq_model = nn.Sequential(\n",
" nn.Linear(11, neuron_count),\n",
" nn.Tanh(),\n",
" nn.Linear(neuron_count, 10))\n",
"\n",
"\n",
"optimizer = optim.SGD(seq_model.parameters(), lr=1e-4)\n",
"\n",
"# Assuming loss_fn is a loss function\n",
"loss_fn = nn.MSELoss()\n",
"optimizer = optim.Adam()\n",
"for epoch in range(1000):\n",
" optimizer.zero_grad()\n",
" t_p_train = seq_model(data_train_norm)\n",
" # Convert target_train to the same data type as t_p_train\n",
" target_train = target_train.float()\n",
" loss_train = loss_fn(t_p_train, target_train)\n",
" loss_train.backward()\n",
" optimizer.step()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
16 changes: 14 additions & 2 deletions p2ch10/dsets.py
Original file line number Diff line number Diff line change
@@ -9,6 +9,8 @@
import SimpleITK as sitk
import numpy as np

#import random

import torch
import torch.cuda
from torch.utils.data import Dataset
@@ -34,6 +36,8 @@ def getCandidateInfoList(requireOnDisk_bool=True):
# We construct a set with all series_uids that are present on disk.
# This will let us use the data, even if we haven't downloaded all of
# the subsets yet.
#There is a bug in implementation, in original structure luna data is located in data
#not data-unversioned
mhd_list = glob.glob('data-unversioned/part2/luna/subset*/*.mhd')
presentOnDisk_set = {os.path.split(p)[-1][:-4] for p in mhd_list}

@@ -82,10 +86,11 @@ def getCandidateInfoList(requireOnDisk_bool=True):

class Ct:
def __init__(self, series_uid):
#so when you enter UID it will automatically form a path from a function below
mhd_path = glob.glob(
'data-unversioned/part2/luna/subset*/{}.mhd'.format(series_uid)
)[0]

#read the image with a special library and transform it into an array
ct_mhd = sitk.ReadImage(mhd_path)
ct_a = np.array(sitk.GetArrayFromImage(ct_mhd), dtype=np.float32)

@@ -103,6 +108,8 @@ def __init__(self, series_uid):
self.direction_a = np.array(ct_mhd.GetDirection()).reshape(3, 3)

def getRawCandidate(self, center_xyz, width_irc):
#so having a candidate_info_list we can look up by UID a center_xyz value and
#use it in this function
center_irc = xyz2irc(
center_xyz,
self.origin_xyz,
@@ -112,6 +119,8 @@ def getRawCandidate(self, center_xyz, width_irc):

slice_list = []
for axis, center_val in enumerate(center_irc):
#so it will write a start index at half a distance from center
#and then end index will be 1 width away from start index
start_ndx = int(round(center_val - width_irc[axis]/2))
end_ndx = int(start_ndx + width_irc[axis])

@@ -135,11 +144,13 @@ def getRawCandidate(self, center_xyz, width_irc):

return ct_chunk, center_irc


#this will cache entire ct
@functools.lru_cache(1, typed=True)
def getCt(series_uid):
return Ct(series_uid)

#so it will cache particular Ct chunk(and the could be several of them)
#to use em during training
@raw_cache.memoize(typed=True)
def getCtRawCandidate(series_uid, center_xyz, width_irc):
ct = getCt(series_uid)
@@ -153,6 +164,7 @@ def __init__(self,
series_uid=None,
):
self.candidateInfo_list = copy.copy(getCandidateInfoList())
#random.shuffle(self.candidateInfo_list)

if series_uid:
self.candidateInfo_list = [
Loading