-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFinance.py
229 lines (189 loc) · 8.31 KB
/
Finance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
#this py file is a conversion of google notebook that is also available as finance.ipynb
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
# Generate sample daily expense data for a user
daily_expenses = {
'date': pd.date_range(start='2024-09-01', end='2024-09-30'),
'amount': np.random.uniform(30, 80, size=30)
}
df_expenses = pd.DataFrame(daily_expenses)
# Preprocess data for LSTM
scaler = MinMaxScaler()
data = scaler.fit_transform(df_expenses[['amount']])
# Prepare sequences for LSTM (e.g., using last 5 days to predict next day)
def create_sequences(data, seq_length):
X, y = [], []
for i in range(len(data) - seq_length):
X.append(data[i:i + seq_length])
y.append(data[i + seq_length])
return np.array(X), np.array(y)
seq_length = 5
X, y = create_sequences(data, seq_length)
# Define the LSTM model
model = Sequential([
LSTM(50, activation='relu', input_shape=(seq_length, 1)),
Dense(1)
])
model.compile(optimizer='adam', loss='mse')
# Train the model
model.fit(X, y, epochs=100, verbose=1)
# Predict the next day's expense
predicted = model.predict(np.expand_dims(data[-seq_length:], axis=0))
predicted_expense = scaler.inverse_transform(predicted)[0][0]
# Plot the results
plt.figure(figsize=(12, 6))
plt.plot(df_expenses['date'], df_expenses['amount'], label='Actual Daily Expenses', color='blue')
plt.scatter(df_expenses['date'].iloc[-1] + pd.Timedelta(days=1), predicted_expense, color='red', label='Predicted Next Day Expense')
plt.title("Daily Expenses and Predicted Next Day Expense")
plt.xlabel("Date")
plt.ylabel("Expense Amount")
plt.legend()
plt.show()
print(f"Predicted Next Day Expense: {predicted_expense:.2f}")
import torch
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
# Step 1: Define the demo dataset with labeled transactions
transaction_texts = [
"Bought groceries at Walmart", # groceries
"Netflix subscription", # entertainment
"Paid apartment rent", # rent
"Grocery shopping at Target", # groceries
"Spotify premium subscription", # entertainment
"Paid for groceries at Whole Foods", # groceries
"Purchased tickets for a concert", # entertainment
"Transferred money for rent", # rent
"Bought movie tickets for Saturday night", # entertainment
"Fresh vegetables and fruits", # groceries
]
transaction_labels = [0, 1, 2, 0, 1, 0, 1, 2, 1, 0] # Labels: 0 = groceries, 1 = entertainment, 2 = rent
# Label to category mapping for later reference
label_map = {0: "groceries", 1: "entertainment", 2: "rent"}
# Step 2: Create a custom Dataset class for loading and processing data
class TransactionDataset(Dataset):
def __init__(self, texts, labels, tokenizer, max_length=64):
self.texts = texts
self.labels = labels
self.tokenizer = tokenizer
self.max_length = max_length
def __len__(self):
return len(self.texts)
def __getitem__(self, idx):
text = self.texts[idx]
label = self.labels[idx]
encoding = self.tokenizer(text, truncation=True, padding='max_length', max_length=self.max_length, return_tensors='pt')
return {
'input_ids': encoding['input_ids'].flatten(),
'attention_mask': encoding['attention_mask'].flatten(),
'labels': torch.tensor(label, dtype=torch.long)
}
# Initialize the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# Create train and test datasets
train_texts, val_texts, train_labels, val_labels = train_test_split(transaction_texts, transaction_labels, test_size=0.2, random_state=42)
train_dataset = TransactionDataset(train_texts, train_labels, tokenizer)
val_dataset = TransactionDataset(val_texts, val_labels, tokenizer)
# Step 3: Load BERT model for classification with 3 categories
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)
# Step 4: Define the TrainingArguments and Trainer for fine-tuning
training_args = TrainingArguments(
output_dir='./results',
num_train_epochs=3,
per_device_train_batch_size=2,
per_device_eval_batch_size=2,
warmup_steps=10,
weight_decay=0.01,
logging_dir='./logs',
logging_steps=5,
evaluation_strategy="epoch",
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=val_dataset
)
# Fine-tune the model
trainer.train()
# Step 5: Evaluate the model on validation data
eval_result = trainer.evaluate()
print(f"Evaluation result: {eval_result}")
# Step 6: Test predictions with fine-tuned model
test_data = [
"Bought some groceries",
"Paid rent for November",
"Enjoyed a movie on Netflix"
]
inputs = tokenizer(test_data, return_tensors='pt', padding=True, truncation=True)
outputs = model(**inputs)
predictions = torch.argmax(outputs.logits, dim=1)
predicted_categories = [label_map[pred.item()] for pred in predictions]
# Displaying predictions
for transaction, category in zip(test_data, predicted_categories):
print(f"Transaction: {transaction} | Predicted Category: {category}")
import numpy as np
import pandas as pd
data = {
'day': list(range(1, 31)), # 30-day period
'current_balance': np.random.randint(500, 1500, 30), # Random current balance amounts
'target_budget': [1000] * 30, # Set a consistent target budget for simplicity
'days_passed': list(range(1, 31)),
'remaining_budget': np.random.randint(200, 800, 30) # Random remaining budget amounts
}
# Convert to DataFrame
spending_summary_df = pd.DataFrame(data)
# Initialize Q-table for RL model
num_days = 30 # Period in days
states = 100 # Discretized states for balance levels
actions = 3 # 0: "Stay on track", 1: "Increase savings", 2: "Adjust spending"
q_table = np.zeros((states, num_days, actions))
# Define parameters for the Q-learning model
learning_rate = 0.1
discount_factor = 0.9
exploration_rate = 0.5
exploration_decay = 0.95
min_exploration_rate = 0.1
# Training Loop over the spending summary dataset
for index, row in spending_summary_df.iterrows():
# State representation based on spending balance and days passed
target_budget = row['target_budget']
current_balance_state = min(int(row['current_balance'] // (target_budget / states)), states - 1)
day_state = row['days_passed'] - 1
# Action selection
if np.random.uniform(0, 1) < exploration_rate:
action = np.random.choice(actions) # Explore random advice
else:
action = np.argmax(q_table[current_balance_state, day_state, :]) # Exploit learned policy
# Simulate reward based on action
if action == 0: # "Stay on track"
reward = 1 if row['current_balance'] <= target_budget else -1
elif action == 1: # "Increase savings"
reward = 2 if row['remaining_budget'] >= target_budget * 0.5 else -2
elif action == 2: # "Adjust spending"
reward = 1 if row['remaining_budget'] >= target_budget * 0.2 else -1
# Update Q-table using Q-learning formula
next_day_state = min(day_state + 1, num_days - 1)
next_balance_state = min(int(row['current_balance'] // (target_budget / states)), states - 1)
best_future_action = np.argmax(q_table[next_balance_state, next_day_state, :])
q_table[current_balance_state, day_state, action] += learning_rate * (
reward + discount_factor * q_table[next_balance_state, next_day_state, best_future_action] -
q_table[current_balance_state, day_state, action]
)
# Decay exploration rate
exploration_rate = max(min_exploration_rate, exploration_rate * exploration_decay)
# Final Advice Generation
advice_map = {0: "Stay on track", 1: "Increase savings", 2: "Adjust spending"}
predictions = []
for day_state in range(num_days):
# Aggregate advice per day
recommended_action = np.argmax(q_table[:, day_state, :].sum(axis=0))
predictions.append(advice_map[recommended_action])
# Display the predicted advice for each day
print("\nPredicted Advice for Each Day:")
for day, advice in enumerate(predictions, 1):
print(f"Day {day}: {advice}")