Skip to content

Commit

Permalink
Final: After testing various configurations, the final model takes 10…
Browse files Browse the repository at this point in the history
…0 epochs, with a learning rate of 0.00005 trained on the full dataset to produce an accuracy of 87.137%
  • Loading branch information
Tickloop committed May 17, 2022
1 parent 1cd523a commit 2f7a2d3
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 18 deletions.
21 changes: 12 additions & 9 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@ def train(model, datasets, mask_tree, max_epochs, eta):
optimizer = Adam(model.parameters(), lr=eta)
loss_criterion = CrossEntropyLoss()
interactions = { ep : { word : {} for word, label in datasets['train'] } for ep in range(max_epochs) }

for epoch in range(max_epochs):
i = 0
model.train()

for correct_word, correct_word_labels in datasets['train']:
features = get_default_features()
i += 1
Expand All @@ -34,7 +36,7 @@ def train(model, datasets, mask_tree, max_epochs, eta):
optimizer.zero_grad()

outputs = model(features)
guessed_word = get_word_beam_search(outputs, mask_tree)
guessed_word = get_word_beam_search(outputs, mask_tree, k=3)

word_loss = loss_criterion(outputs, correct_word_labels)

Expand All @@ -53,25 +55,26 @@ def train(model, datasets, mask_tree, max_epochs, eta):

if guessed_word == correct_word:
break


model.eval()
val_acc[epoch], _ = accuracy(model, datasets['train'], mask_tree)
# val_loss[epoch] = avg_loss(model, datasets['val'], mask_tree)
print(f"Epoch {epoch} / {max_epochs}, loss => {losses[epoch]}, full_acc => {val_acc[epoch]}")
val_loss[epoch] = avg_loss(model, datasets['train'], mask_tree)
print(f"Epoch {epoch} / {max_epochs}, loss => {losses[epoch]}, val_acc => {val_acc[epoch]}, val_loss => {val_loss[epoch]}")

if val_acc[epoch] > max_val_acc:
save_model(model, "100epoch_bigger_train_beam_4")
save_model(model, "100epoch_bigger_full")
max_val_acc = val_acc[epoch]

return losses, interactions

if __name__ == "__main__":
splits = [1.0, 0.0, 0]
splits = [1.0, 0, 0]
mask_tree = get_mask_tree("data/official.txt")
dataset = get_dataset("data/official.txt")
datasets = get_split_dataset(dataset, splits)

b1 = BaseModel(in_features=26 * 12)
b1_loss, interaction_history = train(b1, datasets, mask_tree, max_epochs=100, eta=0.00005)

save_history(interaction_history, "interaction_history_17.json")
save_loss(b1_loss, "100epoch_bigger_train_beam_4.npy")
save_history(interaction_history, "final_interaction_history.json")
save_loss(b1_loss, "100epoch_bigger_full.npy")
2 changes: 0 additions & 2 deletions models.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ def __init__(self, in_features):

self.flatten = nn.Flatten(start_dim=0)
self.activation = nn.ReLU()
self.softmax = nn.Softmax(dim=0)

def forward(self, x):
output = self.flatten(x)
Expand All @@ -33,7 +32,6 @@ def forward(self, x):
outputs = torch.empty((5, 26))
for i, layer in enumerate(self.output_char_layers):
outputs[i] = layer(output)
# outputs[i] = self.softmax(outputs[i])

return outputs

3 changes: 1 addition & 2 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ def get_mask_tree(wordlist_path : str) -> dict:
return mask_tree


def get_word_beam_search(outputs : torch.Tensor, mask_tree : dict) -> str:
def get_word_beam_search(outputs : torch.Tensor, mask_tree : dict, k : int = 3) -> str:
"""
To convert the output of our model to a word that can be made sense of, we use this function.
Rather than taking the argmax independent of the underlying word distribution, we carry out a beam search to find the optimal
Expand All @@ -264,7 +264,6 @@ def get_word_beam_search(outputs : torch.Tensor, mask_tree : dict) -> str:
"""
# initialize
soft_outputs = torch.nn.functional.softmax(outputs, dim=1)
k = 3
mask = mask_tree[0]
mask = torch.tensor(mask)
mask = mask * soft_outputs[0]
Expand Down
35 changes: 30 additions & 5 deletions visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def accuracy_on_output(one_epoch_interaction : dict) -> float:
count += 1
return round(100. * acc / count, 3)

def accuracy_on_dataset(model_path : str, wordlist_path : str, dataset_name : str) -> tuple:
def accuracy_on_dataset(model_path : str, wordlist_path : str, dataset_name : str, k : int = 3) -> tuple:
"""
Given a model path, wordlist path, and the dataset name from {'train', 'test', 'val'},
finds the accuracy on the given dataset.
Expand All @@ -157,16 +157,19 @@ def accuracy_on_dataset(model_path : str, wordlist_path : str, dataset_name : st
`dataset_name`: The default split on the loaded wordlist will be [0.8, 0.05, 0.15] for
{'train', 'val', 'test'}. The dataset_name specifies which dataset to use to find this accuracy.
`k`: The number of words to track in beam search. Increasing this number makes search slower.
Return:
`results`: A dict, storing the attempts that the model made for each word in the specified dataset.
`accuracy`: A float multiplied by 100 to give % of accuracy
"""
splits = [1.0, 0, 0]
splits = [0.8, 0.05, 0]
dataset = get_dataset(wordlist_path)
datasets = get_split_dataset(dataset, splits)
mask_tree = get_mask_tree(wordlist_path)

model = torch.load(model_path)
model.eval()
acc, count = 0., 0.
results = {word : {} for word, label in datasets[dataset_name]}

Expand All @@ -179,7 +182,7 @@ def accuracy_on_dataset(model_path : str, wordlist_path : str, dataset_name : st

for attempt in range(6):
output = model(features)
guessed_word = get_word_beam_search(output, mask_tree)
guessed_word = get_word_beam_search(output, mask_tree, k)
feedback = get_feedback(guessed_word, correct_word)
features = get_updated_features(features, feedback, guessed_word)

Expand Down Expand Up @@ -254,6 +257,20 @@ def show_guess_distribution(results : dict):
plt.show()
plt.close()

def k_variation_beam_search(model_name : str) -> None:
print(model_name)
ks = [1, 3, 5, 10]
results, acc = {}, {}
for k in ks:
results[k], acc[k] = accuracy_on_dataset(model_name, "data/official.txt", "train")

for k in ks:
print(f"Accuracy for k = {k}: {acc[k]}%")
show_guess_distribution(results[k])

print("")


def print_model_statistics(model_name : str) -> None:
"""
This is used to quickly see the statistics like interaction history and accuracy on different
Expand Down Expand Up @@ -283,7 +300,7 @@ def print_model_statistics(model_name : str) -> None:
print(f"Words guessed in vocab(val): {in_vocab['val']}%")
print(f"Words guessed in vocab(test): {in_vocab['test']}%")

show_guess_distribution(results['train'])
# show_guess_distribution(results['train'])
print("")

if __name__ == "__main__":
Expand All @@ -303,8 +320,16 @@ def print_model_statistics(model_name : str) -> None:
# print_model_statistics("models/100epoch_bigger_train_beam")
# print_model_statistics("models/100epoch_bigger_train_beam_2")
# print_model_statistics("models/100epoch_bigger_train_beam_3")
print_model_statistics("models/100epoch_bigger_train_beam_4")
# print_model_statistics("models/100epoch_bigger_train_beam_4")

# print_model_statistics("models/25epoch_biggest_train_beam")
# print_model_statistics("models/25epoch_biggest_train_beam_2")

# k_variation_beam_search("models/100epoch_bigger_train_beam_4")

# print_model_statistics("models/25epoch_bigger_train_beam_k1")
# print_model_statistics("models/25epoch_bigger_train_beam_k3")
# print_model_statistics("models/25epoch_bigger_train_beam_k5")
# print_model_statistics("models/25epoch_bigger_train_beam_k10")

print_model_statistics("models/100epoch_bigger_train")

0 comments on commit 2f7a2d3

Please sign in to comment.